<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR AI</journal-id>
      <journal-title>JMIR AI</journal-title>
      <issn pub-type="epub">2817-1705</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v4i1e66926</article-id>
      <article-id pub-id-type="pmid">40460418</article-id>
      <article-id pub-id-type="doi">10.2196/66926</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>High-Throughput Phenotyping of the Symptoms of Alzheimer Disease and Related Dementias Using Large Language Models: Cross-Sectional Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Liu</surname>
            <given-names>Hongfang</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chiu</surname>
            <given-names>Hung-Wen</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Qi</surname>
            <given-names>Wenhao</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Cheng</surname>
            <given-names>You</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3141-0104</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Malekar</surname>
            <given-names>Mrunal</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0004-4709-1998</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>He</surname>
            <given-names>Yingnan</given-names>
          </name>
          <degrees>MS, MPH</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0003-6082-3893</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Bommareddy</surname>
            <given-names>Apoorva</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0003-8920-8608</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Magdamo</surname>
            <given-names>Colin</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8965-4630</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Singh</surname>
            <given-names>Arjun</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0005-4370-3077</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Westover</surname>
            <given-names>Brandon</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4803-312X</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Mukerji</surname>
            <given-names>Shibani S</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5677-6954</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Dickson</surname>
            <given-names>John</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0135-7928</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Das</surname>
            <given-names>Sudeshna</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Neurology</institution>
            <institution>Massachusetts General Hospital</institution>
            <addr-line>65 Landsdowne St</addr-line>
            <addr-line>Cambridge, MA, 02139</addr-line>
            <country>United States</country>
            <phone>1 617 768 8254</phone>
            <email>SDAS5@mgh.harvard.edu</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9486-6811</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Neurology</institution>
        <institution>Massachusetts General Hospital</institution>
        <addr-line>Cambridge, MA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Harvard Medical School</institution>
        <addr-line>Boston, MA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Neurology</institution>
        <institution>Beth Israel Hospital Boston</institution>
        <addr-line>Boston, MA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Sudeshna Das <email>SDAS5@mgh.harvard.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2025</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>3</day>
        <month>6</month>
        <year>2025</year>
      </pub-date>
      <volume>4</volume>
      <elocation-id>e66926</elocation-id>
      <history>
        <date date-type="received">
          <day>26</day>
          <month>9</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>14</day>
          <month>3</month>
          <year>2025</year>
        </date>
        <date date-type="rev-recd">
          <day>21</day>
          <month>4</month>
          <year>2025</year>
        </date>
        <date date-type="accepted">
          <day>11</day>
          <month>5</month>
          <year>2025</year>
        </date>
      </history>
      <copyright-statement>©You Cheng, Mrunal Malekar, Yingnan He, Apoorva Bommareddy, Colin Magdamo, Arjun Singh, Brandon Westover, Shibani S Mukerji, John Dickson, Sudeshna Das. Originally published in JMIR AI (https://ai.jmir.org), 03.06.2025.</copyright-statement>
      <copyright-year>2025</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR AI, is properly cited. The complete bibliographic information, a link to the original publication on https://www.ai.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://ai.jmir.org/2025/1/e66926" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Alzheimer disease and related dementias (ADRD) are complex disorders with overlapping symptoms and pathologies. Comprehensive records of symptoms in electronic health records (EHRs) are critical for not only reaching an accurate diagnosis but also supporting ongoing research studies and clinical trials. However, these symptoms are frequently obscured within unstructured clinical notes in EHRs, making manual extraction both time-consuming and labor-intensive.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We aimed to automate symptom extraction from the clinical notes of patients with ADRD using fine-tuned large language models (LLMs), compare its performance to regular expression-based symptom recognition, and validate the results using brain magnetic resonance imaging (MRI) data.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We fine-tuned LLMs to extract ADRD symptoms across the following 7 domains: memory, executive function, motor, language, visuospatial, neuropsychiatric, and sleep. We assessed the algorithm’s performance by calculating the area under the receiver operating characteristic curve (AUROC) for each domain. The extracted symptoms were then validated in two analyses: (1) predicting ADRD diagnosis using the counts of extracted symptoms and (2) examining the association between ADRD symptoms and MRI-derived brain volumes.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Symptom extraction across the 7 domains achieved high accuracy with AUROCs ranging from 0.97 to 0.99. Using the counts of extracted symptoms to predict ADRD diagnosis yielded an AUROC of 0.83 (95% CI 0.77-0.89). Symptom associations with brain volumes revealed that a smaller hippocampal volume was linked to memory impairments (odds ratio 0.62, 95% CI 0.46-0.84; <italic>P</italic>=.006), and reduced pallidum size was associated with motor impairments (odds ratio 0.73, 95% CI 0.58-0.90; <italic>P</italic>=.04).</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>These results highlight the accuracy and reliability of our high-throughput ADRD phenotyping algorithm. By enabling automated symptom extraction, our approach has the potential to assist with differential diagnosis, as well as facilitate clinical trials and research studies of dementia.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>electronic health record</kwd>
        <kwd>Alzheimer disease and related dementias</kwd>
        <kwd>large language model</kwd>
        <kwd>disease phenotyping</kwd>
        <kwd>symptom extraction</kwd>
        <kwd>differential diagnosis</kwd>
        <kwd>brain volume</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Alzheimer disease and related dementias (ADRD) encompass a group of disorders characterized by cognitive and behavioral impairments, which progressively affect memory, thinking, and activities of daily living [<xref ref-type="bibr" rid="ref1">1</xref>]. Among them, Alzheimer disease (AD) is the most common form of dementia and affects approximately 6.7 million individuals in the United States [<xref ref-type="bibr" rid="ref1">1</xref>]. Other major types of ADRD include dementia with Lewy bodies (DLB), frontotemporal dementia (FTD; behavioral variant), Parkinson disease (PD), primary progressive aphasia (PPA), and vascular cognitive impairment (VCI), each presenting unique symptom profiles with overlapping characteristics. For example, AD typically presents with memory loss [<xref ref-type="bibr" rid="ref2">2</xref>]; DLB with visual hallucinations, motor symptoms, and sleep disturbances [<xref ref-type="bibr" rid="ref3">3</xref>]; FTD with behavioral and language symptoms [<xref ref-type="bibr" rid="ref4">4</xref>]; and PD with motor symptoms [<xref ref-type="bibr" rid="ref5">5</xref>]. However, clinical presentations and symptoms vary with neuropathology, which contributes to diagnostic challenges. Documentation of ADRD symptoms often exists solely within unstructured clinical notes in electronic health records (EHRs) without any standardization, and manual chart review is error prone and time consuming. The development of an artificial intelligence algorithm for automatic symptom extraction from clinical notes could significantly aid in overcoming these challenges, thereby offering substantial benefits for diagnosis and intervention strategies. Additionally, the symptom data in clinical notes have the potential to facilitate research studies, for example, studies of the longitudinal progression of symptoms in patients with ADRD or how symptoms are documented, shedding light on both medical patterns and recording practices [<xref ref-type="bibr" rid="ref6">6</xref>].</p>
      <p>Symptom extraction is often performed by manual expert chart review, which is inefficient and labor intensive. Traditional text mining and natural language processing (NLP) techniques, which rely on symptom-related keywords specified by domain experts [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>], can facilitate the symptom extraction process. For example, Vijayakrishnan et al [<xref ref-type="bibr" rid="ref9">9</xref>] developed a rule-based NLP pipeline to identify heart failure symptoms using the Framingham heart failure diagnostic criteria. Jackson et al [<xref ref-type="bibr" rid="ref10">10</xref>] created a unified NLP model for extracting severe mental illness symptoms based on a keyword lexicon crafted by psychiatrists. Moreover, Forsyth et al [<xref ref-type="bibr" rid="ref11">11</xref>] developed a machine learning model to extract breast cancer symptoms based on a code book developed by physicians. However, these rule-based or keyword-dependent methods are still susceptible to missing semantic relationships and contextual information.</p>
      <p>In contrast to traditional NLP techniques, the advent of deep learning–based large language transformer models [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref14">14</xref>] presents a significant improvement by understanding contextual information and semantic relationships in clinical notes. In particular, large language models (LLMs) are adept at recognizing complex patterns and relationships within texts using an attention-based transformer model [<xref ref-type="bibr" rid="ref15">15</xref>]. For example, a recent study used LLMs to extract cannabis use and documentation in EHRs among children and young adults [<xref ref-type="bibr" rid="ref16">16</xref>]. In another study, researchers created an LLM-based symptom extraction model that can be applied to extract COVID-19 symptoms from Twitter data [<xref ref-type="bibr" rid="ref17">17</xref>]. Indeed, by understanding the context of keywords and terminologies, these models can enable more accurate and sensitive symptom extraction.</p>
      <p>In this study, we used LLMs [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref18">18</xref>] to extract symptoms from the clinical notes of patients diagnosed with ADRD. Symptoms were categorized into 7 domains: <italic>memory</italic>, <italic>executive function</italic>, <italic>motor</italic>, <italic>language</italic>, <italic>visuospatial</italic>, <italic>neuropsychiatric</italic>, and <italic>sleep</italic>, with distinction as impaired, intact, or no information. This method quantified symptom occurrences for further analysis. The overall aim was to develop an effective model for automated symptom extraction, which may not only facilitate the differential diagnosis of ADRD (AD, DLB, FTD, PD, PPA, and VCI), but also support research on heterogeneity within these subtypes. To evaluate the effectiveness of our LLM-based approach, we compared it against a traditional rule-based method using regular expressions for symptom extraction. We further validated the model’s symptom predictions using brain volume data derived from magnetic resonance imaging (MRI).</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Study Dataset</title>
        <p>The dataset consisted of the EHR data of patients from the Massachusetts General Hospital (MGH) memory clinic (collected between 2015 and 2022), who were over 50 years old at their first visit and had at least two MGH memory clinic encounters. The dataset was further filtered to exclude patients without an office or telemedicine visit or those who did not have a progress note with at least 512 characters. The final dataset was filtered to only include patients with 1 of 6 ADRD diagnoses during their latest encounter: AD, DLB, FTD, PD, PPA, or VCI, and without mixed dementia in their EHR history. See <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for the full list of diagnosis names by ADRD category.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study was approved by the Mass General Brigham Institutional Review Board (protocol 2015P001915), with a waiver of informed consent granted for secondary analysis of electronic health records. No participant compensation was provided. Data were extracted from Epic and securely stored on servers within the Mass General Brigham firewall, with access limited to authorized study personnel in accordance with institutional privacy and data security policies.</p>
      </sec>
      <sec>
        <title>Preprocessing</title>
        <p>To process the notes, we applied <italic>medspaCy</italic>, a specialized text analysis tool for clinical notes [<xref ref-type="bibr" rid="ref19">19</xref>]. We extracted key sections of the notes that held important information regarding the patient’s symptoms such as medical history, examination, and impression. The extraction tool was customized for each physician’s template. Subsequently, we sampled notes based on ADRD diagnoses and split notes into sentences or phrases for symptom annotation.</p>
      </sec>
      <sec>
        <title>Annotation</title>
        <p>An expert (AB) conducted thorough review of the medical literature and identified symptoms from seven domains typically present in patients living with ADRD: (1) memory, (2) executive function, (3) motor, (4) language, (5) visuospatial, (6) neuropsychiatric (which also incorporates symptoms related to behavior and mood), and (7) sleep (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). A behavioral neurologist (JD) provided critical input throughout both processes. Subsequently, another expert (MM) annotated sentences or phrases as <italic>symptom</italic> (patient shows intact or impaired symptoms) or <italic>no symptom</italic> (no information on patient symptoms). Further, MM annotated sentences or phrases as <italic>intact</italic>, <italic>impaired</italic>, or <italic>no information</italic> for each of the 7 symptom domains, using a web-based JavaScript annotation tool developed by AS. Using these annotations, we created 2 gold standard datasets: <italic>gold standard dataset I</italic> (composed of sentences or phrases labeled as <italic>symptom</italic> or <italic>no symptom</italic>) and <italic>gold standard dataset II</italic> (composed of sentences or phrases labeled as <italic>intact</italic>, <italic>impaired</italic>, or <italic>no information</italic> across the 7 symptom domains). The process for creating the gold standard dataset is illustrated in <xref rid="figure1" ref-type="fig">Figure 1</xref>A.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Model development and architecture. (A) Gold standard dataset creation and model development. This workflow describes the development of a 2-tier hierarchical model to classify symptoms in clinical notes. Initially, 1712 memory clinic notes are processed, and sentences sampled across various Alzheimer disease and related dementias (ADRD) diagnoses are manually annotated using a web tool, producing 2 gold standard datasets: one identifying symptom presence, and another categorizing symptom status across 7 domains. The 2 classification models, built on BioBERT, undergo fine-tuning using 80% of the data and testing using 20% of a held-out dataset. (B) Illustration of the application of BioBERT in stage I and stage II models for symptom extraction. dx: diagnosis.</p>
          </caption>
          <graphic xlink:href="ai_v4i1e66926_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Symptom Recognition Using BioBERT</title>
        <p>We developed a 2-tier hierarchical model for symptom extraction. The <italic>stage I binary symptom classification model</italic> classified each input sentence as <italic>symptom</italic> or <italic>no symptom</italic>. The <italic>stage II multi-label symptom classification model</italic> is composed of 7 distinct models, with each trained to classify sentences or phrases from 1 of the 7 symptom domains, namely <italic>memory</italic>, <italic>executive function</italic>, <italic>motor</italic>, <italic>language</italic>, <italic>visuospatial</italic>, <italic>neuropsychiatric</italic>, and <italic>sleep</italic>. Each <italic>stage II multi-label symptom classification model</italic> classifies sentences or phrases into 3 categories: <italic>impaired</italic>, <italic>intact</italic>, and <italic>no information</italic>. The <italic>impaired</italic> category encapsulates symptoms indicative of impairment within the specific domain, highlighting manifestations of dysfunction. Conversely, the <italic>intact</italic> category encompasses symptoms that reflect normal functioning of the respective symptom domain. The <italic>no information</italic> category encompasses all remaining symptoms from other categories (eg, a sentence that only mentions <italic>motor</italic> symptom is categorized as <italic>no information</italic> in the <italic>memory</italic> model), supplemented by nonsymptomatic sentences.</p>
        <p>Both the <italic>stage I binary symptom classification model</italic> and <italic>stage II multi-label symptom classification model</italic> were developed using <italic>BioBERT</italic> [<xref ref-type="bibr" rid="ref20">20</xref>], an LLM pretrained on a large corpus of biomedical text (eg, PubMed abstracts and PubMed Central full-text articles) and implemented using the HuggingFace’s <italic>Python</italic> <italic>transformers</italic> package (version 4.8.2) [<xref ref-type="bibr" rid="ref21">21</xref>]. The <italic>stage I binary symptom classification model</italic> was initialized with its pretrained parameters of BioBERT and then fine-tuned on the <italic>gold standard dataset I</italic> (80% training set, 20% hold-out set)<italic>.</italic> The <italic>stage II multi-label symptom classification model</italic> was again initialized with pretrained parameters and later fine-tuned on <italic>gold standard dataset II</italic> (80% training set, 20% hold-out set). <italic>Optuna</italic> hyperparameter tuning was used to tune the hyperparameters for both models, including training epochs, batch size, and learning rate, with a 20-trial study to maximize the area under the precision-recall curve. An early stopping criterion was implemented to cease training if the loss did not change substantially in 4 epochs, preventing overfitting.</p>
        <p><xref rid="figure1" ref-type="fig">Figure 1</xref>B shows how we used BioBERT for the stage I and stage II models. We used the pretrained BioBERT model as a starting point and fine-tuned it for our task. As shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>B, the extracted sentences are first processed through the BioBERT tokenizer, which splits the raw text into tokens. For example, the sentence “Patient has difficulty walking” is tokenized. Then, each token is converted into a pretrained embedding, capturing the semantic meaning of the word in the context of the sentence, along with a position embedding that encodes the token’s location within the sequence to help the model understand word order and structure. A [CLS] token is added at the beginning of each sentence. Its embedding is used to represent the aggregated meaning of the entire sentence. A [SEP] token is placed at the end to signify the boundary between input tokens. E (embedding) from 1 to n represents the token embeddings, with the total count of n including [CLS] and [SEP]. These embeddings are passed through BioBERT’s transformer layers, which use self-attention and feed-forward neural networks to generate context-aware embeddings. As the sentence passes through the layers, the embedding of the [CLS] token becomes enriched with contextualized information derived from the full sentence, which represents the overall meaning of the input. Finally, the embedding of the [CLS] token is used as the input for the linear layer, which calculates the logits for each class. Sigmoid (for binary classification) or SoftMax (for multi-class classification) as a decision function is applied to these logits to obtain class probabilities, and the class with the highest probability is selected as the model’s predicted label. We fine-tuned BioBERT separately for stage I (binary classification) using gold standard dataset I and for stage II (multi-label classification) using gold standard dataset II. The fine-tuning process primarily involves adjusting the parameters of the BioBERT transformer layers and the linear layer to optimize performance for each stage’s specific classification task.</p>
        <p>We also experimented with other pretrained models as part of our preliminary experiments, including ClinicalBERT, RoBERTa, and LLaMA 2, with the latter being a generative transformer model. Despite fine-tuning (for ClinicalBERT and RoBERTa) or prompt engineering (for LLaMA 2), the models did not achieve the same level of performance as BioBERT in symptom classification based on the area under the receiver operating characteristic curve (AUROC) and <italic>F</italic><sub>1</sub>-score. All text processing and LLM development procedures were conducted in <italic>Python</italic> (version 3.8.15).</p>
      </sec>
      <sec>
        <title>Symptom Recognition Using Regular Expressions</title>
        <p>We created a list of regex patterns for ADRD symptoms to compare the efficacy of our advanced LLM approach with the traditional rule-based regex technique. First, 100 patient visit notes across the 6 ADRD diagnoses (AD, DLB, FTD, PD, PPA, and VCI) were randomly sampled. These notes were analyzed to identify examples from each of the 7 symptom domains (<italic>memory</italic>, <italic>executive function</italic>, <italic>motor</italic>, <italic>language</italic>, <italic>visuospatial</italic>, <italic>neuropsychiatric</italic>, and <italic>sleep</italic>) and develop a comprehensive set of regex patterns for each symptom domain. An expert behavioral neurologist (JD) provided critical guidance throughout this process. Next, these regex patterns were used to flag sentences or phrases corresponding to each symptom domain in the entire set of visit notes. The symptom counts for each note were then aggregated to calculate the total number of matches for each domain. For the full list of regex patterns, please see <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p>
      </sec>
      <sec>
        <title>Validation via ADRD Differential Diagnosis</title>
        <p>We compiled symptom counts across 7 domains (<italic>memory</italic>, <italic>executive function</italic>, <italic>motor</italic>, <italic>language</italic>, <italic>visuospatial</italic>, <italic>neuropsychiatric</italic>, and <italic>sleep</italic>) based on predictions of our 2-tier hierarchical model on the entire set of visit notes. These symptom counts served as input features for a multinomial L1-regularized logistic regression model to classify 6 ADRD diagnoses (AD, DLB, FTD, PD, PPA, and VCI). To optimize the model, we employed 5-fold cross-validation and grid search cross-validation to determine the optimal value of alpha for L1 regularization using the <italic>Python</italic> <italic>scikit-learn</italic> (version 0.24.2) package. Additionally, we incorporated the aggregated symptom counts, derived from applying the ADRD symptom regex patterns (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>) on the same dataset, as features in the machine learning model. We hypothesized that symptoms identified with our 2-tier hierarchical model would have superior performance than those derived from regex patterns in predicting ADRD diagnoses. All ADRD differential diagnosis analyses were conducted in <italic>Python</italic> (version 3.8.15).</p>
      </sec>
      <sec>
        <title>Validation via MRI Brain Volume Data</title>
        <p>To evaluate symptom predictions using MRI, we selected memory clinic notes with an MRI scan performed within 1 year of the visit. We ensured that none of these notes overlapped with the gold standard datasets. Each clinical note was matched with a unique MRI scan from the Mass General Brigham patient database, with the imaging date being within 1 year of the visit date. The <italic>SynthSeg+</italic> pipeline [<xref ref-type="bibr" rid="ref22">22</xref>] was used for brain segmentation and volume estimation. Only those images whose subcortical regions collectively surpassed a threshold of 0.65 in the average automated quality control score were selected for further analysis. For patients with multiple eligible clinical images, the final brain volume was determined by averaging the volumes across all qualifying images. Furthermore, to account for individual differences, the volume of each brain region was normalized by the intracranial volume.</p>
        <p>In our brain volume analysis, we first selected <italic>a priori</italic> brain regions associated with 2 of the most commonly disrupted functions in patients with ADRD: <italic>memory</italic> and <italic>motor</italic>. For <italic>memory</italic> symptoms, we investigated the bilateral hippocampus and entorhinal cortex, both associated with the memory of recent events, as well as the prefrontal cortex, which is related to immediate memory [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref25">25</xref>]. For <italic>motor</italic> symptoms, our evaluation encompassed the bilateral primary motor cortex, the secondary motor cortex, the basal ganglia (including the caudate, putamen, pallidum, and nucleus accumbens) along with the thalamus (a structure with strong connections to the basal ganglia), and the cerebellar gray and white matter [<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref29">29</xref>].</p>
        <p>Logistic regression was used to evaluate the volumes of brain regions associated with symptoms, with a contrast of cases having <italic>impaired</italic> symptoms and those having either <italic>intact</italic> symptoms or <italic>no information</italic>. The analysis was conducted for both <italic>memory</italic> and <italic>motor</italic> symptoms, with adjustments made for age and sex, using the function <italic>glm</italic> in the <italic>R stats</italic> (version 4.3.2) package. The reported results were adjusted for multiple comparisons using the Benjamini-Hochberg method [<xref ref-type="bibr" rid="ref30">30</xref>]. All MRI brain volume analyses were conducted in <italic>R</italic> (version 4.2.1; R Core Team). For a detailed workflow of validation using MRI, see Figure S1 in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Study Data</title>
        <p>The study data consisted of visit notes from the latest encounters of 1712 patients (<xref rid="figure2" ref-type="fig">Figure 2</xref>). The visit notes were from 866 (50.6%) male and 846 (49.4%) female patients, with an average age at visit of 77.5 (SD 8.3) years. All patients had 1 of the following ADRD diagnoses: AD, DLB, FTD, PD, PPA, and VCI. The patient demographics are described in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        <p>From these 1712 visit notes, we compiled 2 gold standard datasets. Gold standard dataset I included 10,089 sentences or phrases labeled as <italic>symptom</italic> (n=5468, 54.2%) or <italic>no symptom</italic> (n=4621, 45.8%). Gold standard dataset II included 6784 sentences or phrases labeled as <italic>intact</italic>, <italic>impaired</italic>, or <italic>no information</italic> across the 7 symptom domains. The ADRD diagnoses in dataset II predominantly included AD (2862/6784, 42.2%) and DLB (1866/6784, 27.5%), followed by FTD (879/6784, 13.0%), PD (628/6784, 9.3%), VCI (479/6784, 7.1%), and PPA (70/6784, 1.0%). Specifically, AD had the highest counts for <italic>memory</italic> and <italic>visuospatial</italic> symptoms; DLB led in <italic>executive function</italic> symptoms; PD was predominant in <italic>motor</italic> symptoms; PPA led in <italic>language</italic> symptoms; and FTD was notable for <italic>neuropsychiatric</italic> and <italic>sleep</italic> symptoms, with high counts also noted in <italic>visuospatial</italic> and <italic>sleep</italic> symptoms for VCI and DLB, respectively (refer to <xref ref-type="table" rid="table2">Table 2</xref> for detailed distributions). A standardized mean difference (SMD) threshold of 0.1 was employed to assess the equilibrium of each metric, with measurements exceeding 0.1 indicating a comparative lack of balance. The MRI validation dataset included 582 visit notes from 528 unique patients and had clinical MRI performed within 1 year (Figure S2 in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>). For demographic distribution related to these visit notes, refer to the last column of <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Consort diagram of the selection of patients with Alzheimer disease and related dementias (ADRD). This consort diagram illustrates the patient selection process from the Massachusetts General Hospital (MGH) memory clinic. dx: diagnosis.</p>
          </caption>
          <graphic xlink:href="ai_v4i1e66926_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Summary statistics of the demographic and clinical characteristics of 1712 patients, including a subset of 582 visits from 528 patients with valid magnetic resonance imaging data.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="530"/>
            <col width="220"/>
            <col width="220"/>
            <thead>
              <tr valign="bottom">
                <td colspan="2">Characteristic</td>
                <td>Total sample (N=1712)</td>
                <td>MRI<sup>a</sup> sample (n=582)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">Age at visit (years), mean (SD)</td>
                <td>77.5 (8.3)</td>
                <td>76.3 (7.3)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Sex, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>846 (49.4)</td>
                <td>279 (47.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>866 (50.6)</td>
                <td>303 (52.1)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Race and ethnicity, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Non-Hispanic White</td>
                <td>1317 (76.9)</td>
                <td>459 (78.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Non-Hispanic Black</td>
                <td>40 (2.0)</td>
                <td>10 (1.7)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Non-Hispanic Asian</td>
                <td>42 (2.5)</td>
                <td>16 (2.7)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hispanic or Latino</td>
                <td>54 (3.2)</td>
                <td>20 (3.4)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>American Indian or Alaska Native</td>
                <td>3 (0.2)</td>
                <td>1 (0.2)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Other</td>
                <td>25 (1.5)</td>
                <td>9 (1.5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Unavailable</td>
                <td>231 (13.5)</td>
                <td>67 (11.5)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Visit diagnosis, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Alzheimer disease</td>
                <td>1117 (65.2)</td>
                <td>378 (64.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Dementia with Lewy bodies</td>
                <td>143 (8.4)</td>
                <td>44 (7.6)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Frontotemporal dementia</td>
                <td>195 (11.4)</td>
                <td>67 (11.5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Parkinson disease</td>
                <td>53 (3.1)</td>
                <td>15 (2.6)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Primary progressive aphasia</td>
                <td>89 (5.2)</td>
                <td>24 (4.1)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Vascular cognitive impairment</td>
                <td>115 (6.7)</td>
                <td>54 (9.3)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>MRI: magnetic resonance imaging.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Summary statistics of gold standard dataset II.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="140"/>
            <col width="0"/>
            <col width="110"/>
            <col width="0"/>
            <col width="110"/>
            <col width="0"/>
            <col width="110"/>
            <col width="0"/>
            <col width="110"/>
            <col width="0"/>
            <col width="110"/>
            <col width="0"/>
            <col width="110"/>
            <col width="0"/>
            <col width="110"/>
            <col width="0"/>
            <col width="0"/>
            <col width="60"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Characteristic</td>
                <td colspan="2">Total (N=6784)</td>
                <td colspan="2">AD<sup>a</sup> (n=2862)</td>
                <td colspan="2">DLB<sup>b</sup> (n=1866)</td>
                <td colspan="2">FTD<sup>c</sup> (n=879)</td>
                <td colspan="2">PD<sup>d</sup> (n=628)</td>
                <td colspan="2">PPA<sup>e</sup> (n=70)</td>
                <td colspan="2">VCI<sup>f</sup> (n=479)</td>
                <td colspan="2">SMD<sup>g</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">Age at visit (years), mean (SD)</td>
                <td colspan="2">77.7 (7.9)</td>
                <td colspan="2">79.9 (7.43)</td>
                <td colspan="2">74.8 (7.4)</td>
                <td colspan="2">75.8 (7.3)</td>
                <td colspan="2">75 (7.7)</td>
                <td colspan="2">72.4 (7.3)</td>
                <td colspan="2">83.6 (7.0)</td>
                <td colspan="2">0.661<sup>h</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="18">
                  <bold>Sex, n (%)</bold>
                </td>
                <td>0.540<sup>h</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td colspan="2">3221 (47.5)</td>
                <td colspan="2">1637 (57.2)</td>
                <td colspan="2">468 (25.1)</td>
                <td colspan="2">650 (73.9)</td>
                <td colspan="2">142 (22.6)</td>
                <td colspan="2">43 (61.4)</td>
                <td colspan="2">281 (58.7)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td colspan="2">3563 (52.5)</td>
                <td colspan="2">1225 (42.8)</td>
                <td colspan="2">1398 (74.9)</td>
                <td colspan="2">229 (26.1)</td>
                <td colspan="2">486 (77.4)</td>
                <td colspan="2">27 (38.6)</td>
                <td colspan="2">198 (41.3)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="18">
                  <bold>Race and ethnicity, n (%)</bold>
                </td>
                <td>0.844<sup>h</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Non-Hispanic White</td>
                <td colspan="2">4613 (68.0)</td>
                <td colspan="2">1948 (68.1)</td>
                <td colspan="2">1498 (80.3)</td>
                <td colspan="2">251 (28.6)</td>
                <td colspan="2">485 (77.2)</td>
                <td colspan="2">68 (97.1)</td>
                <td colspan="2">363 (75.8)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Non-Hispanic Black</td>
                <td colspan="2">61 (0.9)</td>
                <td colspan="2">11 (0.4)</td>
                <td colspan="2">15 (0.8)</td>
                <td colspan="2">19 (2.2)</td>
                <td colspan="2">0 (0.0)</td>
                <td colspan="2">0 (0.0)</td>
                <td colspan="2">16 (3.3)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Non-Hispanic Asian</td>
                <td colspan="2">213 (3.1)</td>
                <td colspan="2">109 (3.8)</td>
                <td colspan="2">48 (2.6)</td>
                <td colspan="2">7 (0.8)</td>
                <td colspan="2">41 (6.5)</td>
                <td colspan="2">0 (0.0)</td>
                <td colspan="2">8 (1.7)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hispanic or Latino</td>
                <td colspan="2">353 (5.2)</td>
                <td colspan="2">324 (11.3)</td>
                <td colspan="2">0 (0.0)</td>
                <td colspan="2">0 (0.0)</td>
                <td colspan="2">18 (2.9)</td>
                <td colspan="2">0 (0.0)</td>
                <td colspan="2">11 (2.3)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>American Indian or Alaska Native</td>
                <td colspan="2">0 (0.0)</td>
                <td colspan="2">0 (0.0)</td>
                <td colspan="2">0 (0.0)</td>
                <td colspan="2">0 (0.0)</td>
                <td colspan="2">0 (0.0)</td>
                <td colspan="2">0 (0.0)</td>
                <td colspan="2">0 (0.0)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Other</td>
                <td colspan="2">64 (0.9)</td>
                <td colspan="2">64 (2.2)</td>
                <td colspan="2">0 (0.0)</td>
                <td colspan="2">0 (0.0)</td>
                <td colspan="2">0 (0.0)</td>
                <td colspan="2">0 (0.0)</td>
                <td colspan="2">0 (0.0)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Unavailable</td>
                <td colspan="2">1480 (21.8)</td>
                <td colspan="2">406 (14.2)</td>
                <td colspan="2">305 (16.3)</td>
                <td colspan="2">602 (68.5)</td>
                <td colspan="2">84 (13.4)</td>
                <td colspan="2">2 (2.9)</td>
                <td colspan="2">81 (16.9)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="18">
                  <bold>Memory, n (%)</bold>
                </td>
                <td>0.275<sup>h</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Impaired</td>
                <td colspan="2">767 (11.3)</td>
                <td colspan="2">493 (17.2)</td>
                <td colspan="2">143 (7.7)</td>
                <td colspan="2">33 (3.8)</td>
                <td colspan="2">29 (4.6)</td>
                <td colspan="2">5 (7.1)</td>
                <td colspan="2">64 (13.4)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Intact</td>
                <td colspan="2">219 (3.2)</td>
                <td colspan="2">98 (3.4)</td>
                <td colspan="2">23 (1.2)</td>
                <td colspan="2">49 (5.6)</td>
                <td colspan="2">15 (2.4)</td>
                <td colspan="2">2 (2.9)</td>
                <td colspan="2">32 (6.7)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>No information</td>
                <td colspan="2">5798 (85.5)</td>
                <td colspan="2">2271 (79.4)</td>
                <td colspan="2">1700 (91.1)</td>
                <td colspan="2">797 (90.7)</td>
                <td colspan="2">584 (93.0)</td>
                <td colspan="2">63 (90.0)</td>
                <td colspan="2">383 (80.0)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="18">
                  <bold>Executive function, n (%)</bold>
                </td>
                <td>0.173<sup>h</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Impaired</td>
                <td colspan="2">797 (11.7)</td>
                <td colspan="2">371 (13.0)</td>
                <td colspan="2">256 (13.7)</td>
                <td colspan="2">43 (4.9)</td>
                <td colspan="2">68 (10.8)</td>
                <td colspan="2">5 (7.1)</td>
                <td colspan="2">54 (11.3)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Intact</td>
                <td colspan="2">240 (3.5)</td>
                <td colspan="2">118 (4.1)</td>
                <td colspan="2">70 (3.8)</td>
                <td colspan="2">13 (1.5)</td>
                <td colspan="2">16 (2.5)</td>
                <td colspan="2">2 (2.9)</td>
                <td colspan="2">21 (4.4)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>No information</td>
                <td colspan="2">5747 (84.7)</td>
                <td colspan="2">2373 (82.9)</td>
                <td colspan="2">1540 (82.5)</td>
                <td colspan="2">823 (93.6)</td>
                <td colspan="2">544 (86.6)</td>
                <td colspan="2">63 (90.0)</td>
                <td colspan="2">404 (84.3)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="18">
                  <bold>Motor, n (%)</bold>
                </td>
                <td>0.562<sup>h</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Impaired</td>
                <td colspan="2">1202 (17.7)</td>
                <td colspan="2">321 (11.2)</td>
                <td colspan="2">555 (29.7)</td>
                <td colspan="2">32 (3.6)</td>
                <td colspan="2">236 (37.6)</td>
                <td colspan="2">8 (11.4)</td>
                <td colspan="2">50 (10.4)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Intact</td>
                <td colspan="2">792 (11.7)</td>
                <td colspan="2">300 (10.5)</td>
                <td colspan="2">246 (13.2)</td>
                <td colspan="2">65 (7.4)</td>
                <td colspan="2">117 (18.6)</td>
                <td colspan="2">20 (28.6)</td>
                <td colspan="2">44 (9.2)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>No information</td>
                <td colspan="2">4790 (70.6)</td>
                <td colspan="2">2241 (78.3)</td>
                <td colspan="2">1065 (57.1)</td>
                <td colspan="2">782 (89.0)</td>
                <td colspan="2">275 (43.8)</td>
                <td colspan="2">42 (60.0)</td>
                <td colspan="2">385 (80.4)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="18">
                  <bold>Language, n (%)</bold>
                </td>
                <td>0.345<sup>h</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Impaired</td>
                <td colspan="2">545 (8.0)</td>
                <td colspan="2">214 (7.5)</td>
                <td colspan="2">89 (4.8)</td>
                <td colspan="2">167 (19.0)</td>
                <td colspan="2">31 (4.9)</td>
                <td colspan="2">19 (27.1)</td>
                <td colspan="2">25 (5.2)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Intact</td>
                <td colspan="2">263 (3.9)</td>
                <td colspan="2">104 (3.6)</td>
                <td colspan="2">54 (2.9)</td>
                <td colspan="2">54 (6.1)</td>
                <td colspan="2">22 (3.5)</td>
                <td colspan="2">5 (7.1)</td>
                <td colspan="2">24 (5.0)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>No information</td>
                <td colspan="2">5976 (88.1)</td>
                <td colspan="2">2544 (88.9)</td>
                <td colspan="2">1723 (92.3)</td>
                <td colspan="2">658 (74.9)</td>
                <td colspan="2">575 (91.6)</td>
                <td colspan="2">46 (65.8)</td>
                <td colspan="2">430 (89.8)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="18">
                  <bold>Visuospatial, n (%)</bold>
                </td>
                <td>0.154<sup>h</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Impaired</td>
                <td colspan="2">359 (5.3)</td>
                <td colspan="2">196 (6.8)</td>
                <td colspan="2">90 (4.8)</td>
                <td colspan="2">11 (1.3)</td>
                <td colspan="2">31 (4.9)</td>
                <td colspan="2">2 (2.9)</td>
                <td colspan="2">29 (6.1)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Intact</td>
                <td colspan="2">153 (2.3)</td>
                <td colspan="2">69 (2.4)</td>
                <td colspan="2">29 (1.6)</td>
                <td colspan="2">20 (2.3)</td>
                <td colspan="2">18 (2.9)</td>
                <td colspan="2">1 (1.4)</td>
                <td colspan="2">16 (3.3)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>No information</td>
                <td colspan="2">6272 (92.5)</td>
                <td colspan="2">2597 (90.7)</td>
                <td colspan="2">1747 (93.6)</td>
                <td colspan="2">848 (96.5)</td>
                <td colspan="2">579 (92.2)</td>
                <td colspan="2">67 (95.7)</td>
                <td colspan="2">434 (90.6)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="18">
                  <bold>Neuropsychiatric, n (%)</bold>
                </td>
                <td>0.453<sup>h</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Impaired</td>
                <td colspan="2">740 (10.9)</td>
                <td colspan="2">274 (9.6)</td>
                <td colspan="2">162 (8.7)</td>
                <td colspan="2">236 (26.8)</td>
                <td colspan="2">25 (4.0)</td>
                <td colspan="2">1 (1.4)</td>
                <td colspan="2">42 (8.8)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Intact</td>
                <td colspan="2">644 (9.5)</td>
                <td colspan="2">331 (11.6)</td>
                <td colspan="2">110 (5.9)</td>
                <td colspan="2">97 (11.0)</td>
                <td colspan="2">16 (2.5)</td>
                <td colspan="2">4 (5.7)</td>
                <td colspan="2">86 (18.0)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>No information</td>
                <td colspan="2">5400 (79.6)</td>
                <td colspan="2">2257 (78.9)</td>
                <td colspan="2">1594 (85.4)</td>
                <td colspan="2">546 (62.1)</td>
                <td colspan="2">587 (93.5)</td>
                <td colspan="2">65 (92.9)</td>
                <td colspan="2">351 (73.3)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="18">
                  <bold>Sleep, n (%)</bold>
                </td>
                <td>0.246<sup>h</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Impaired</td>
                <td colspan="2">333 (4.9)</td>
                <td colspan="2">98 (3.4)</td>
                <td colspan="2">125 (6.7)</td>
                <td colspan="2">76 (8.6)</td>
                <td colspan="2">25 (4.0)</td>
                <td colspan="2">0 (0.0)</td>
                <td colspan="2">9 (1.9)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Intact</td>
                <td colspan="2">157 (2.3)</td>
                <td colspan="2">74 (2.6)</td>
                <td colspan="2">41 (2.2)</td>
                <td colspan="2">16 (1.8)</td>
                <td colspan="2">9 (1.4)</td>
                <td colspan="2">0 (0.0)</td>
                <td colspan="2">17 (3.5)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>No information</td>
                <td colspan="2">6294 (92.8)</td>
                <td colspan="2">2690 (94.0)</td>
                <td colspan="2">1700 (91.1)</td>
                <td colspan="2">787 (89.5)</td>
                <td colspan="2">594 (94.6)</td>
                <td colspan="2">70 (100.0)</td>
                <td colspan="2">453 (94.6)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>AD: Alzheimer disease.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>DLB: dementia with Lewy bodies.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>FTD: frontotemporal dementia.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>PD: Parkinson disease.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>PPA: primary progressive aphasia.</p>
            </fn>
            <fn id="table2fn6">
              <p><sup>f</sup>VCI: vascular cognitive impairment.</p>
            </fn>
            <fn id="table2fn7">
              <p><sup>g</sup>SMD: standardized mean difference.</p>
            </fn>
            <fn id="table2fn8">
              <p><sup>h</sup>Indicates comparative lack of balance.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Symptom Recognition Using a Transformer-Based Language Model</title>
        <p>We trained, validated, and tested a transformer-based LLM to identify symptoms related to ADRD diagnoses. The symptom extraction process was executed through a 2-stage framework. The stage I binary symptom classification model categorized sentences as either <italic>symptom</italic> or <italic>no symptom</italic>. The model attained a micro-averaged AUROC of 1.00 (95% CI 0.99-1.00), along with a micro-averaged <italic>F</italic><sub>1</sub>-score of 0.98 (95% CI 0.97-0.98), micro-averaged precision of 0.98 (95% CI 0.97-0.98), and micro-averaged recall of 0.98 (95% CI 0.97-0.98), highlighting its ability to accurately detect symptom presence. The 95% CIs for each metric reflect the reliability of these estimates, confirming the model’s overall efficacy in symptom classification across diverse clinical features.</p>
        <p>This initial classification is followed by the use of the stage II multi-label symptom classification models, which further classify each detected symptom into <italic>impaired</italic>, <italic>intact</italic>, and <italic>no information</italic>. The 7 stage II models are tailored to each specific domain (<italic>memory</italic>, <italic>executive function</italic>, <italic>motor</italic>, <italic>language</italic>, <italic>visuospatial</italic>, <italic>neuropsychiatric</italic>, and <italic>sleep</italic>). All symptom domains showed robust model performance, with micro-averaged AUROC values of 0.97-0.99, micro-averaged <italic>F</italic><sub>1</sub>-score values of 0.89-0.96, micro-averaged precision values of 0.87-0.96, and micro-averaged recall values of 0.91-0.96 across all symptoms. Among these, we observed slightly lower metrics in the visuospatial domain (micro-averaged AUROC: 0.97, 95% CI 0.95-0.99; micro-averaged <italic>F</italic><sub>1</sub>-score: 0.89, 95% CI 0.85-0.93; micro-averaged precision: 0.87, 95% CI 0.83-0.91; micro-averaged recall: 0.91, 95% CI 0.87-0.94). <xref ref-type="table" rid="table3">Table 3</xref> provides a comprehensive evaluation of the performance metrics for both models.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Performance of the 2-tier hierarchical symptom classification model.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="170"/>
            <col width="140"/>
            <col width="130"/>
            <col width="130"/>
            <col width="130"/>
            <col width="130"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Model</td>
                <td><italic>F</italic><sub>1</sub>-score<sup>a</sup>, value (95% CI)</td>
                <td>AUPRC<sup>a,b</sup>, value (95% CI)</td>
                <td>Precision<sup>a</sup>, value (95% CI)</td>
                <td>Recall<sup>a</sup>, value (95% CI)</td>
                <td>AUROC<sup>a,c</sup>, value (95% CI)</td>
                <td>Accuracy<sup>a</sup>, value (95% CI)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">Stage I binary symptom classification model</td>
                <td>0.98 (0.97-0.98)</td>
                <td>1.00 (0.99-1.00)</td>
                <td>0.98 (0.97-0.98)</td>
                <td>0.98 (0.97-0.98)</td>
                <td>1.00 (0.99-1.00)</td>
                <td>0.98 (0.97-0.98)</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Stage II multi-label symptom classification model</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Memory</td>
                <td>0.96 (0.94-0.98)</td>
                <td>0.94 (0.91-0.96)</td>
                <td>0.96 (0.95-0.98)</td>
                <td>0.95 (0.94-0.97)</td>
                <td>0.99 (0.98-1.00)</td>
                <td>0.94 (0.92-0.96)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Executive function</td>
                <td>0.91 (0.88-0.94)</td>
                <td>0.85 (0.82-0.89)</td>
                <td>0.90 (0.87-0.92)</td>
                <td>0.92 (0.90-0.95)</td>
                <td>0.98 (0.97-0.99)</td>
                <td>0.87 (0.84-0.90)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Motor</td>
                <td>0.94 (0.92-0.96)</td>
                <td>0.90 (0.87-0.92)</td>
                <td>0.93 (0.91-0.95)</td>
                <td>0.94 (0.92-0.96)</td>
                <td>0.98 (0.97-0.99)</td>
                <td>0.93 (0.91-0.95)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Language</td>
                <td>0.93 (0.92-0.96)</td>
                <td>0.97 (0.97-0.99)</td>
                <td>0.93 (0.91-0.96)</td>
                <td>0.93 (0.92-0.96)</td>
                <td>0.98 (0.96-0.99)</td>
                <td>0.91 (0.88-0.94)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Visuospatial</td>
                <td>0.89 (0.85-0.93)</td>
                <td>0.82 (0.78-0.87)</td>
                <td>0.87 (0.83-0.91)</td>
                <td>0.91 (0.87-0.94)</td>
                <td>0.97 (0.95-0.99)</td>
                <td>0.82 (0.78-0.87)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Neuropsychiatric</td>
                <td>0.91 (0.89-0.95)</td>
                <td>0.94 (0.91-0.96)</td>
                <td>0.91 (0.88-0.94)</td>
                <td>0.92 (0.89-0.94)</td>
                <td>0.99 (0.98-1.00)</td>
                <td>0.90 (0.87-0.93)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Sleep</td>
                <td>0.96 (0.94-0.98)</td>
                <td>0.94 (0.91-0.96)</td>
                <td>0.96 (0.94-0.98)</td>
                <td>0.96 (0.94-0.98)</td>
                <td>0.99 (0.98-1.00)</td>
                <td>0.95 (0.92-0.98)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>The performance metrics for both models are calculated as micro-averages.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>AUPRC: area under the precision-recall curve.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>AUROC: area under the receiver operating characteristic curve.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Model Validation With ADRD Differential Diagnosis</title>
        <p>To validate the accuracy of our 2-tier hierarchical symptom classification model, we used a machine learning model to classify ADRD diagnoses with the counts of identified symptoms as model features. We compared 2 L1-regularized logistic regression models: one based on regex-derived symptom counts and another using counts derived from the 2-tier hierarchical LLM. This method allowed us to assess the efficacy of traditional regex techniques against more advanced LLM approaches in the context of ADRD diagnostic accuracy.</p>
        <p>First, we predicted ADRD diagnoses using L1 logistic regression based on regex-derived symptom counts. Using regex patterns, we extracted symptom counts from the latest visit notes of 1712 patients diagnosed with ADRD, spanning 7 domains: memory, executive function, motor, language, visuospatial, neuropsychiatric, and sleep. These counts were used to build an L1-regularized multinomial logistic regression model, which predicted the type of ADRD diagnosis using symptom counts as features. The model’s average AUROC was 0.59 (95% CI 0.51-0.66). Detailed AUROC values for each ADRD diagnosis relative to the rest are displayed in <xref rid="figure3" ref-type="fig">Figure 3</xref>A.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Performance of Alzheimer disease and related dementias (ADRD) differential diagnosis. (A) Receiver operating characteristic (ROC) curves for predicting 6 ADRD diagnoses (Alzheimer disease [AD], dementia with Lewy bodies [DLB], frontotemporal dementia [FTD], Parkinson disease [PD], primary progressive aphasia [PPA], and vascular cognitive impairment [VCI]) using an L1-regularized logistic regression model based on regex-derived symptom counts. The area under the receiver operating characteristic curve (AUROC) is 0.59 (95% CI 0.51-0.66). (B) ROC curves for an L1-regularized logistic regression model using 2-tier hierarchical large language model (LLM)-derived symptom counts. The AUROC is 0.83 (95% CI 0.77-0.89). (C) Feature importance ranking for the model using LLM-derived symptom counts, with an average across the coefficients of symptoms in all ADRD diagnoses. Executive function is the most important feature, followed by language, motor, memory, neuropsychiatric, visuospatial, and sleep. AUC: area under the curve.</p>
          </caption>
          <graphic xlink:href="ai_v4i1e66926_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Second, we predicted ADRD diagnoses using L1 logistic regression based on LLM symptom counts. The second model, leveraging symptom counts extracted from patient visit notes via the 2-tier hierarchical LLM, aimed to predict specific ADRD diagnoses using L1-regularized logistic regression. This model demonstrated a substantial enhancement in diagnostic accuracy, achieving an AUROC of 0.83 (95% CI 0.77-0.89) compared to the AUROC of 0.59 (95% CI 0.51-0.66) obtained with the regex-based model. This marked improvement highlights the model’s efficacy in accurately classifying ADRD categories, underscoring the potential of transformer-based BioBERT models in capturing the context of clinical symptoms from notes. The detailed AUROC for each diagnosis compared to the rest is displayed in <xref rid="figure3" ref-type="fig">Figure 3</xref>B.</p>
        <p>Further, analysis using feature importance derived from the LLM-based logistic regression model showed that <italic>executive function</italic> had the greatest predictive power on average, followed by <italic>language</italic>, <italic>motor</italic>, <italic>memory</italic>, <italic>neuropsychiatric</italic>, <italic>visuospatial</italic>, and <italic>sleep</italic>. This ranking, illustrated in <xref rid="figure3" ref-type="fig">Figure 3</xref>C, emphasizes the critical roles of <italic>executive function</italic>, <italic>language</italic>, <italic>memory</italic>, and <italic>motor</italic> symptoms in predicting ADRD diagnoses. Feature importance rankings for each ADRD diagnosis are illustrated in Figure S3 in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>.</p>
      </sec>
      <sec>
        <title>Model Validation With Brain MRI</title>
        <p>We used MRI brain volume data to assess our model’s ability to identify symptoms from clinical notes. We hypothesized that the volumes of selected brain regions associated with each domain would be smaller in patients with impaired symptoms predicted from the notes compared to those without. The model analyzed 582 sentences or phrases, identifying memory impairment in 90.7% (528/582) and motor impairment in 80.6% (469/582) of cases. In particular, we observed that memory-impaired individuals showed smaller hippocampal and prefrontal cortex volumes (SMDs &#62;0.1), while motor-impaired individuals had reduced volumes in subcortical regions, including the thalamus, putamen, pallidum, and accumbens area (SMDs &#62;0.1). For brain volume summary statistics from <italic>memory</italic> and <italic>motor</italic> BioBERT model predictions, see <xref rid="figure4" ref-type="fig">Figure 4</xref>A.</p>
        <p>The <italic>memory</italic> model predicted that visit notes of patients with AD had the highest proportion (93.7%) of <italic>memory</italic> symptoms relative to the other ADRD diagnoses, which is consistent with our understanding that memory impairment is the initial and primary symptom for most patients with AD [<xref ref-type="bibr" rid="ref2">2</xref>] (<xref rid="figure4" ref-type="fig">Figure 4</xref>B). The MRI analysis of <italic>memory</italic> symptoms revealed that a smaller hippocampal volume was associated with an increased likelihood of memory impairment (odds ratio [OR] 0.62, 95% CI 0.46-0.84; <italic>P</italic>=.006) (<xref rid="figure4" ref-type="fig">Figure 4</xref>C). Power analysis for the logistic regression, using 1000 simulations, yielded an 89.7% chance of detecting a significant impact of hippocampal volume on <italic>memory</italic> symptoms, thereby confirming the reliability of these findings. Nonetheless, the volumes of the entorhinal cortex and prefrontal cortex did not show a significant relationship with <italic>memory</italic> symptoms (<italic>P</italic>&#62;.05), but the prefrontal cortex had high SMDs (<xref rid="figure4" ref-type="fig">Figure 4</xref>A).</p>
        <p>In terms of <italic>motor</italic> symptoms, the <italic>motor</italic> model predicted that visit notes with DLB (95.5%) and PD (100%) diagnoses had the highest proportion of <italic>motor</italic> symptoms across visit notes of ADRD diagnoses, which is consistent with our understanding that motor impairment is the primary symptom for patients with DLB and PD [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref5">5</xref>] (<xref rid="figure4" ref-type="fig">Figure 4</xref>D). The MRI analysis of <italic>motor</italic> symptoms revealed that a smaller pallidum size was significantly associated with the presence of motor impairments (OR 0.73, 95% CI 0.58-0.9; <italic>P</italic>=.04) (<xref rid="figure4" ref-type="fig">Figure 4</xref>E). Power analysis for the logistic regression, conducted with 1000 simulations, revealed an 84.7% probability of accurately detecting a significant influence of pallidum volume on motor symptoms, which substantiates the robustness of our results. Other regions related to motor function did not exhibit significant volumetric differences (<italic>P</italic>&#62;.05). Age and sex were accounted for in all analyses. All results were corrected for multiple comparisons [<xref ref-type="bibr" rid="ref30">30</xref>]. Thus, the MRI findings corroborated both <italic>memory</italic> and <italic>motor</italic> symptom predictions made by our 2-tier hierarchical LLM.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Evaluation of model performance with magnetic resonance imaging brain volume. (A) Summary statistics of the volumes of brain regions associated with memory or motor functions. A standardized mean difference (SMD) threshold of 0.1 has been employed to assess the equilibrium of each metric. Measurements with an SMD exceeding 0.1 (highlighted in bold) signify a comparative lack of balance. (B) Percentage of visit notes with at least one impaired memory symptom predicted by the memory model across visit notes with Alzheimer disease and related dementias (ADRD) diagnosis. The number above each bar represents the number of visit notes in each ADRD diagnosis where impaired memory symptoms were detected. As expected, visit notes with Alzheimer disease (AD) diagnosis had the highest proportion of memory symptoms across all ADRD diagnoses. (C) Coronal view of the brain area associated with memory impairment. Patients with a smaller hippocampus had a higher likelihood of memory impairment (odds ratio [OR] 0.62; <italic>P</italic>=.006). (D) Percentage of visit notes with at least one impaired motor symptom predicted by the motor model across visit notes with ADRD diagnosis. The number above each bar represents the number of visit notes in each ADRD diagnosis where impaired motor symptoms were detected. As expected, visit notes with dementia with Lewy bodies (DLB) and Parkinson disease (PD) diagnoses had the highest proportion of motor symptoms across all ADRD diagnoses. (E) Coronal view of the brain area associated with motor impairment. Patients with a smaller pallidum had a higher likelihood of motor impairment (OR 0.73; <italic>P</italic>=.04). All <italic>P</italic> values have been adjusted for multiple comparisons. FTD: frontotemporal dementia; PPA: primary progressive aphasia; VCI: vascular cognitive impairment. *<italic>P</italic>&#60;.05, **<italic>P</italic>&#60;.01.</p>
          </caption>
          <graphic xlink:href="ai_v4i1e66926_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Error Analysis</title>
        <p>We performed an error analysis to gain insights into the misclassifications made by the 2-tier hierarchical LLM, particularly in its ability to classify symptoms as intact or impaired across the 7 domains. We included both the held-out test set and the MRI validation dataset in our analysis to ensure thoroughness. It is worth mentioning that since the MRI validation dataset does not include true labels, we relied on chart reviews to validate predictions (Figure S1 in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>).</p>
        <p>Our error analysis began by examining instances where the models’ predictions of symptoms across ADRD types did not align with known disease profiles. For example, in AD cases, where memory impairment is a prominent symptom [<xref ref-type="bibr" rid="ref2">2</xref>], the model did not predict <italic>memory</italic> symptoms in 6.1% (23/378) of cases. These instances were notable for their focus on broader cognitive decline or general test scores rather than explicit mentions of <italic>memory</italic> symptoms. In FTD, 93% (62/67) of visit notes referenced <italic>memory</italic> symptoms, which is intriguing since memory impairment is not typical in FTD, particularly in its behavioral variant [<xref ref-type="bibr" rid="ref31">31</xref>]. Manual review confirmed that these symptoms were indeed documented. In VCI, 87% (47/54) of visit notes mentioned <italic>memory</italic> symptoms, with a consistent recognition of memory issues as a feature of VCI [<xref ref-type="bibr" rid="ref32">32</xref>]. The model detected <italic>memory</italic> symptoms in 84% (37/44) of DLB visit notes and 79% (19/24) of PPA cases, which often concerned semantic memory challenges. Another example involves <italic>motor</italic> symptoms. The model showed a small margin of error in DLB cases, failing to detect <italic>motor</italic> symptoms in just 2 cases (2/44, 5%). In AD visit notes, <italic>motor</italic> symptoms were predicted accurately in 76.2% (288/378) of notes. FTD cases showed an 88% (59/67) occurrence of <italic>motor</italic> symptoms, and VCI notes included <italic>motor</italic> symptom references in 92.6% (50/54) of cases, often related to lower body motor challenges. PPA patients were identified with <italic>motor</italic> symptoms in 63% (15/24) of notes, with manual verification confirming the presence of true <italic>motor</italic> symptoms in majority (11/15, 73%) of these cases.</p>
        <p>The second part of the error analysis investigated visit notes by random sampling, with a focus on notes with high symptom counts (more than 10 symptom predictions). This examination uncovered several types of errors affecting prediction accuracy across all symptoms, including six types of false positives: (1) generalizing cognitive function as a symptom, (2) confusing one symptom with another symptom, (3) identifying evaluation or test statements as impairment, (4) misrecognizing intact as impaired, (5) misleading by ambiguous or complex sentences, and (6) confusing medical history as present symptoms. Four types of false negatives were also identified, including (1) overlooking particular expressions, (2) overlooking particular test scores, (3) misrecognizing impaired as intact, and (4) overlooking sentences or phrases that require contextual information. <xref ref-type="table" rid="table4">Table 4</xref> provides a detailed breakdown of these error types and examples from visit notes. Additionally, to understand the distribution of false positives and false negatives across the model’s predictions at the sentence level, we calculated confusion matrices based on the held-out test set for each symptom, and the data are presented in Figure S4 in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Types of errors in model prediction.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="400"/>
            <col width="0"/>
            <col width="570"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Types of errors</td>
                <td>Example (mislabeled category; correct category)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="4">
                  <bold>False positive</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Generalizing cognitive function as a symptom</td>
                <td colspan="2">
                  <list list-type="bullet">
                    <list-item>
                      <p>“problem in cognitive functioning” (mislabeled: impaired memory; correct: no information)</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Confusing one symptom with another symptom</td>
                <td colspan="2">
                  <list list-type="bullet">
                    <list-item>
                      <p>“she began to have trouble sorting items” (mislabeled: impaired memory; correct: impaired executive function)</p>
                    </list-item>
                    <list-item>
                      <p>“cannot remember a word” (mislabeled: impaired motor; correct: impaired memory)</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Identifying evaluation or test statements as impairment</td>
                <td colspan="2">
                  <list list-type="bullet">
                    <list-item>
                      <p>“patient visit for evaluation of memory impairment” (mislabeled: impaired memory; correct: no information)</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Misrecognizing intact as impaired</td>
                <td colspan="2">
                  <list list-type="bullet">
                    <list-item>
                      <p>“Mild wordfinding difficulty has resolved” (mislabeled: impaired language; correct: intact language)</p>
                    </list-item>
                    <list-item>
                      <p>“No disorientation in time” (mislabeled: impaired memory; correct: intact memory)</p>
                    </list-item>
                    <list-item>
                      <p>“Plantar response is flexor bilaterally” (mislabeled: impaired motor; correct: intact motor)</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Misleading by ambiguous or complex sentences</td>
                <td colspan="2">
                  <list list-type="bullet">
                    <list-item>
                      <p>“Speech is fluent but some dysnomia is noted” (mislabeled: intact language; correct: impaired language)</p>
                    </list-item>
                    <list-item>
                      <p>“Long term memory is fine but short term memory is not great” (mislabeled: intact memory; correct: impaired memory)</p>
                    </list-item>
                    <list-item>
                      <p>“Impairment of short-term memory has declined” (mislabeled: intact memory; correct: impaired memory)</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Confusing medical history as present symptoms</td>
                <td colspan="2">
                  <list list-type="bullet">
                    <list-item>
                      <p>“ask about his past falls” (mislabeled: impaired motor; correct: no information)</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>False negative</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Overlooking particular expressions</td>
                <td colspan="2">
                  <list list-type="bullet">
                    <list-item>
                      <p>“repeat the same question over and over again” (mislabeled: no information; correct: impaired memory)</p>
                    </list-item>
                    <list-item>
                      <p>“he puts things away in the wrong place” (mislabeled: no information; correct: impaired memory)</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Overlooking particular test scores</td>
                <td colspan="2">
                  <list list-type="bullet">
                    <list-item>
                      <p>“CDR-SOB memory is 1” (mislabeled: no information; correct: impaired memory)</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Misrecognizing impaired as intact</td>
                <td colspan="2">
                  <list list-type="bullet">
                    <list-item>
                      <p>“oriented partially in time” (mislabeled: intact memory; correct: impaired memory)</p>
                    </list-item>
                    <list-item>
                      <p>“oriented to his wife but has visual agnosia” (mislabeled: intact visuospatial; correct: impaired visuospatial)</p>
                    </list-item>
                    <list-item>
                      <p>“He requires help to dress only for adult undergarments but not for clothes” (mislabeled: intact motor; correct: impaired motor)</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Overlooking sentences or phrases that require contextual information</td>
                <td colspan="2">
                  <list list-type="bullet">
                    <list-item>
                      <p>“memory has been stable for 2 years. He has worsened in the past 5 months” (mislabeled: intact memory; correct: impaired memory)</p>
                    </list-item>
                    <list-item>
                      <p>“Gait: … slow to initiate.” (mislabeled: no information; correct: impaired motor)</p>
                    </list-item>
                  </list>
                </td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>In this study, we developed and evaluated an LLM-based 2-tier hierarchical model for automated symptom extraction, which was trained on expert-labeled visit notes from patients with ADRD at the MGH memory clinic. The model classified sentences or phrases into categories of <italic>impaired</italic>, <italic>intact</italic>, or <italic>no information</italic> for 7 ADRD symptoms: <italic>memory</italic>, <italic>executive function</italic>, <italic>motor</italic>, <italic>language</italic>, <italic>visuospatial</italic>, <italic>neuropsychiatric</italic>, and <italic>sleep</italic>. Our method demonstrated superiority over rule-based and keyword-dependent methods [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref11">11</xref>], which often miss nuanced contextual and semantic relationships. The model achieved robust performance in detecting each symptom from clinical notes, with a micro-averaged AUROC ranging from 0.97 to 0.99. Furthermore, with the implementation of our LLM-based symptom extraction, the AUROC for ADRD differential diagnosis improved substantially (AUROC=0.83) compared to regex-based extraction (AUROC=0.59). Moreover, our model’s predictions aligned with clinical evidence, with most clinical notes correctly matching their respective symptoms. Further, the associations of symptoms with different affected brain regions were substantiated through brain MRI findings. Thus, our model holds potential as a screening tool to streamline diagnosis, improve precision in clinical trials and treatment planning, and enhance our understanding of ADRD subtype heterogeneity.</p>
      <p>Traditional approaches, such as regex-based methods, are highly dependent on predefined sets of keywords or rules. They struggle with variations in how symptoms are expressed. For instance, the phrase “difficulty swallowing” could be documented in various ways, such as “unable to swallow” and “has trouble swallowing,” or with more context-specific expressions like “takes 60 minutes to feed the patient a meal.” It is difficult to build a one-size-fits-all rule for captioning every symptom in each domain. To illustrate these challenges, we created a list of regex patterns for ADRD symptoms (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>) and compared the performance of our LLM-based model with traditional regex techniques. We evaluated both methods using 2 L1-regularized logistic regression models: one based on symptom counts derived from regex patterns, and another using counts from our 2-tier hierarchical LLM. Our results showed that the LLM-based model significantly outperformed the regex-based model, achieving an AUROC of 0.83, compared to an AUROC of 0.59 obtained with the regex-based model. This improvement demonstrates the LLM’s ability to better capture the context of clinical symptoms in ADRD, highlighting the superiority of transformer-based models, like BioBERT, in overcoming the limitations of traditional rule-based approaches. Other researchers have used NLP approaches to determine or extract information from clinical notes as well. For example, Prakash et al [<xref ref-type="bibr" rid="ref33">33</xref>] achieved strong accuracy and <italic>F</italic><sub>1</sub>-scores (83%-92%) for determining the presence of ADRD severity information in clinical notes using rule-based methods. Similarly, Chen et al [<xref ref-type="bibr" rid="ref34">34</xref>] developed a rule-based NLP pipeline to extract cognitive test scores and biomarkers from clinical narratives, achieving an <italic>F</italic><sub>1</sub>-score of 0.9059 across 7 different measures. Their focus was on identifying and harmonizing cognitive test scores in severity categories for patients with ADRD. However, these approaches primarily focus on specific cognitive tests and biomarkers, which are typically more straightforward to identify. In contrast, our method focuses on symptom extraction of sentences across 7 distinct domains. Symptoms are more complex and less structured, requiring a deep understanding of contextual relationships to accurately identify and classify them. Our study verified that the transformer-based BERT model can address this challenge to handle complex medical terminologies and capture the meanings of terms within their context.</p>
      <p>As expected, in ADRD differential diagnosis, <italic>memory</italic> emerged as the most crucial symptom for predicting AD (Figure S3A in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>), <italic>motor</italic> was the most significant symptom for predicting DLB (Figure S3B in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>), and <italic>language</italic> was the most important symptom for predicting PPA (Figure S3E in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>). These findings are consistent with our understanding of the clinical manifestations of these diseases [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref4">4</xref>].</p>
      <p>While no single disease required all 7 symptoms for prediction (Figure S3 in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>), <italic>executive function</italic> stood out as the most important (for AD, PD, and VCI; see Figures S3A, D, and F in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>) or moderately important (for DLB, FTD, and PPA; see Figures S3B, C, and E in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>) feature across all predictions. Notably, the importance of <italic>executive function</italic> in predicting AD was comparable to that of <italic>memory</italic>. This may be due to the broad range of behaviors associated with <italic>executive function</italic>, such as planning, time management, and working memory, which are intricately woven into the complexity of daily life. Additionally, the frontal lobe, a key hub for <italic>executive function</italic> [<xref ref-type="bibr" rid="ref35">35</xref>], is extensively connected with other brain regions involved in various functions [<xref ref-type="bibr" rid="ref36">36</xref>]. For example, <italic>memory</italic> impairment may impact the hippocampal-prefrontal pathway [<xref ref-type="bibr" rid="ref37">37</xref>], thereby affecting tasks that require both <italic>memory</italic> and <italic>executive function</italic>, such as remembering to take medications at specific times. This pattern also helps explain why, in the case of FTD, a disease characterized by severe behavioral manifestations [<xref ref-type="bibr" rid="ref4">4</xref>] and frontal or temporal lobe degeneration [<xref ref-type="bibr" rid="ref38">38</xref>], <italic>executive function</italic> provides only moderate predictive power. Although this might seem counterintuitive given the role of <italic>executive function</italic> in FTD, it may be because the behavioral symptoms in FTD are more prominent, and <italic>executive function</italic> may not have sufficient discriminatory power for a differential diagnosis. Moreover, frontal lobe atrophy in FTD may affect behavior in a manner similar to how disruption in the connection between the frontal lobe and other functional areas impacts executive tasks, thereby influencing the overall predictive value of <italic>executive function</italic> in this context.</p>
      <p>In the context of ADRD differential diagnosis, our model identified <italic>memory</italic> as a moderately important symptom on average for diagnosing ADRD (<xref rid="figure3" ref-type="fig">Figure 3</xref>C). When evaluating prediction performance by specific ADRD diagnoses, <italic>memory</italic> was ranked as the most crucial symptom for predicting AD (Figure S3A in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>); moderately important for FTD (Figure S3C in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>) and VCI (Figure S3F in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>); and least important for DLB (Figure S3B in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>), PD (Figure S3D in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>), and PPA (Figure S3E in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>). This importance ranking for <italic>memory</italic> aligns with existing knowledge about the prevalence of memory impairment across different ADRD diagnoses [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. The model generally performed well in identifying <italic>memory</italic> symptoms. However, in some patients with AD, <italic>memory</italic> symptoms were not predicted. Further analysis revealed that this was likely due to follow-up notes simply stating “no change” in the patient’s condition, which did not trigger the model’s detection mechanisms. This suggests a need for improvement in detecting implied or static memory impairments. Additionally, some notes detailed atypical AD presentations, emphasizing language or motor difficulties rather than memory loss, which can indicate variations in clinical presentation among patients with the same underlying etiology. Further, an unexpectedly high prevalence of <italic>memory</italic> symptoms in FTD underscores the complexity of symptomatology. While aging has been suggested as a confounding factor for <italic>memory</italic> symptoms in FTD [<xref ref-type="bibr" rid="ref4">4</xref>], our data indicated no significant age difference in patients with and without <italic>memory</italic> symptoms. Meanwhile, some studies have suggested that <italic>memory</italic> symptoms can emerge in patients with progressive FTD, akin to AD presentations [<xref ref-type="bibr" rid="ref39">39</xref>], which may explain our observation. In DLB cases, our model detected <italic>memory</italic> symptoms in many visit notes, with only 1 case later reclassified as AD. Although DLB typically lacks early memory impairment, such symptoms can develop as the condition advances [<xref ref-type="bibr" rid="ref3">3</xref>]. Most evaluated visit notes were from initial visits, suggesting that DLB diagnoses might already be at more advanced stages by then. Further analysis showed that AD cases had more frequent memory-related references than DLB (Wilcoxon rank sum test W=105474; <italic>P</italic>&#60;.001), demonstrating our model’s ability to distinguish patterns of the same symptom across different diagnoses.</p>
      <p>Motor symptoms were the most prevalent impairments among patients with ADRD in our dataset (<xref ref-type="table" rid="table2">Table 2</xref>) and showed moderate importance on average in predicting ADRD diagnoses (<xref rid="figure3" ref-type="fig">Figure 3</xref>C). When evaluating prediction performance by specific ADRD diagnoses, <italic>motor</italic> was ranked as the most crucial symptom for predicting DLB (Figure S3B in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>); moderately important for AD (Figure S3A in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>), PD (Figure S3D in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>), PPA (Figure S3E in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>), and VCI (Figure S3F in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>); and least important for FTD (Figure S3C in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>). This importance ranking for <italic>motor</italic> aligns with existing knowledge about the prevalence of motor impairment in AD, DLB, PPA, and VCI diagnoses [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. The low ranking of <italic>motor</italic> in predicting FTD and its moderate ranking for PD was unexpected, considering that these 2 diseases have more behavioral symptoms closely associated with motor function [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. This discrepancy might be due to the broad range of <italic>motor</italic> functions involved, making it harder to distinguish nuances between these diseases and others, similar to the case where <italic>executive function</italic> had a moderate contribution in predicting FTD. As expected, patients with DLB or PD had the highest occurrences of <italic>motor</italic> symptoms. Notably, 1 patient initially diagnosed with mild cognitive impairment was later found to have DLB, which the model had correctly predicted, underscoring the model’s robustness. FTD cases often exhibited <italic>motor</italic> symptoms, even though their diagnoses did not change to DLB or PD in later visits. This was observed despite excluding <italic>motor</italic> symptom subtypes like corticobasal syndrome or progressive supranuclear palsy [<xref ref-type="bibr" rid="ref4">4</xref>], and no motor neuron diseases were noted. This underscores that motor symptoms can develop in patients with FTD over time, even when they are not diagnosed with conditions typically associated with these symptoms. Moreover, patients with FTD having <italic>motor</italic> symptoms were generally older, aligning with symptom progression, although the age difference was not statistically significant. In patients with AD, the model’s prediction of frequent <italic>motor</italic> symptoms, such as “unsteady stance” and “perseveration of movement” (largely confirmed upon chart review), aligns with literature indicating that late-stage AD can manifest motor impairments [<xref ref-type="bibr" rid="ref2">2</xref>], similar to those seen in DLB or PD [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. This suggests that these patients with AD may be at more advanced stages of the disease. Patients with AD having <italic>motor</italic> symptoms were generally older, which is consistent with the progression hypothesis, though this relationship was not statistically significant. The high occurrence of <italic>motor</italic> symptoms in VCI cases (confirmed through manual review), which emphasized sentences or phrases that particularly mention the lower body being affected, such as “gait instability” and “frequent falls,” aligns with clinical knowledge [<xref ref-type="bibr" rid="ref32">32</xref>]. Only 1 predicted VCI case was later diagnosed with DLB, highlighting the model’s specificity for differential diagnosis.</p>
      <p>Among all symptom predictions, <italic>visuospatial</italic> symptoms had the lowest performance (<xref ref-type="table" rid="table3">Table 3</xref>). Further review revealed that certain behaviors might reflect mixed symptoms in patients’ clinical presentations. For example, “unable to drive” in clinical notes could be due to impaired navigation ability [<xref ref-type="bibr" rid="ref40">40</xref>-<xref ref-type="bibr" rid="ref42">42</xref>], typically categorized as a <italic>visuospatial</italic> symptom, but driving is a complex behavior that also involves <italic>executive function</italic> for planning the route [<xref ref-type="bibr" rid="ref43">43</xref>], <italic>memory</italic> for remembering place names [<xref ref-type="bibr" rid="ref43">43</xref>], and <italic>motor</italic> skills for physical control [<xref ref-type="bibr" rid="ref43">43</xref>]. Therefore, developing more refined models that can better distinguish and specifically target <italic>visuospatial</italic> symptoms will be essential for improving the accuracy of symptom extraction.</p>
      <p>This study has several limitations. While our current NLP techniques proved to be effective in symptom extraction, the model performance is still susceptible to diverse clinical narratives and abbreviations. For example, we tailored data preprocessing templates for each provider, which makes it challenging to generalize the model to different health care settings. Additionally, our study focused on patients with a single ADRD diagnosis, yet many patients fall into the dementia unspecified category due to mixed dementia. For instance, autopsy studies revealed that patients with pure VCI were less common than those with mixed dementia [<xref ref-type="bibr" rid="ref44">44</xref>], which often co-occurred with AD pathology [<xref ref-type="bibr" rid="ref45">45</xref>] and complicated the diagnostic process. Finally, our method is primarily intended for research use, and several challenges, such as data privacy, clinician–artificial intelligence interaction, and model performance, need to be overcome before it is ready for clinical decision-making.</p>
      <p>Future studies should include patients with multiple ADRD diagnoses and at different disease stages to better reflect real-world complexities. Enhancements might include more sophisticated language parsing and the integration of clinical criteria for improved specificity. Moreover, integrating structured patient data, such as demographics and neurological tests, could enhance the model’s precision and generalizability. Recent studies, such as the study by Xue et al [<xref ref-type="bibr" rid="ref46">46</xref>], have shown the potential of transformer-based models for multi-modal differential diagnosis of dementia, suggesting avenues for further refinement of our approach. Furthermore, the dataset generated through our efforts provides a foundation for successive cycles of the active learning loop, having the potential to continually refine and elevate the model’s performance over time. Future research should leverage this dataset to further improve model performance and explore avenues for expanding the scope of symptom extraction in diverse clinical scenarios.</p>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Alzheimer disease and related dementias diagnosis list.</p>
        <media xlink:href="ai_v4i1e66926_app1.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 42 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Symptom domain and examples.</p>
        <media xlink:href="ai_v4i1e66926_app2.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 30 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Alzheimer disease and related dementias symptom regular expression list.</p>
        <media xlink:href="ai_v4i1e66926_app3.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 16 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Supplementary data to support the findings.</p>
        <media xlink:href="ai_v4i1e66926_app4.docx" xlink:title="DOCX File , 1548 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AD</term>
          <def>
            <p>Alzheimer disease</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ADRD</term>
          <def>
            <p>Alzheimer disease and related dementias</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">AUROC</term>
          <def>
            <p>area under the receiver operating characteristic curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">DLB</term>
          <def>
            <p>dementia with Lewy bodies</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">FTD</term>
          <def>
            <p>frontotemporal dementia</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">MGH</term>
          <def>
            <p>Massachusetts General Hospital</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">MRI</term>
          <def>
            <p>magnetic resonance imaging</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">PD</term>
          <def>
            <p>Parkinson disease</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">PPA</term>
          <def>
            <p>primary progressive aphasia</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">SMD</term>
          <def>
            <p>standardized mean difference</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">VCI</term>
          <def>
            <p>vascular cognitive impairment</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by the National Institute of Aging (grant number: P30AG062421), the National Institute of Health (grant numbers: R56AG082698 and R01AG082698), and the Massachusetts Life Science Center funding for data science internships. We thank Yu Leng for proofreading <xref rid="figure1" ref-type="fig">Figure 1</xref>.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>YC contributed to data curation, formal analysis, investigation, methodology, software, visualization, writing – original draft, and writing – review and editing. MM contributed to data curation, formal analysis, investigation, methodology, software, visualization, and writing – original draft. YH contributed to software, formal analysis, visualization, and writing – review and editing. AB contributed to data curation and formal analysis. CM contributed to conceptualization and writing – review and editing. BW contributed to writing – review and editing. AS contributed to software. SSM contributed to writing – review and editing. JD contributed to data curation and writing – review and editing. SD contributed to conceptualization, funding acquisition, investigation, methodology, supervision, and writing – review and editing.</p>
      </fn>
      <fn fn-type="conflict">
        <p>BW was supported by grants from the National Institutes of Health (RF1AG064312, RF1NS120947, R01AG073410, R01HL161253, R01NS126282, R01AG073598, R01NS131347, R01NS130119, R01NS131347). BW is a co-founder, scientific advisor, consultant to, and has personal equity interest in Beacon Biosignals. SSM receives consultant fees from Nav Health and owns less than 1% of Gilead stock. JD served on a scientific review board for I-Mab Biopharma. The other authors declare no conflicts of interest.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <article-title>Alzheimer's Disease Facts and Figures</article-title>
          <source>Alzheimer's Association</source>
          <access-date>2025-05-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.alz.org/alzheimers-dementia/facts-figures">https://www.alz.org/alzheimers-dementia/facts-figures</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wolk</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Dickerson</surname>
              <given-names>BC</given-names>
            </name>
          </person-group>
          <article-title>Clinical features and diagnosis of Alzheimer disease</article-title>
          <source>UpToDate</source>
          <access-date>2025-05-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.uptodate.com/contents/clinical-features-and-diagnosis-of-alzheimer-disease">https://www.uptodate.com/contents/clinical-features-and-diagnosis-of-alzheimer-disease</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McFarland</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Clinical features and diagnosis of dementia with Lewy bodies</article-title>
          <source>UpToDate</source>
          <access-date>2025-05-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.uptodate.com/contents/clinical-features-and-diagnosis-of-dementia-with-lewy-bodies">https://www.uptodate.com/contents/clinical-features-and-diagnosis-of-dementia-with-lewy-bodies</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>SE</given-names>
            </name>
          </person-group>
          <article-title>Frontotemporal dementia: Clinical features and diagnosis</article-title>
          <source>UpToDate</source>
          <access-date>2025-05-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.uptodate.com/contents/frontotemporal-dementia-clinical-features-and-diagnosis">https://www.uptodate.com/contents/frontotemporal-dementia-clinical-features-and-diagnosis</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rodnitzky</surname>
              <given-names>RL</given-names>
            </name>
          </person-group>
          <article-title>Cognitive impairment and dementia in Parkinson disease</article-title>
          <source>UpToDate</source>
          <access-date>2025-05-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.uptodate.com/contents/cognitive-impairment-and-dementia-in-parkinson-disease">https://www.uptodate.com/contents/cognitive-impairment-and-dementia-in-parkinson-disease</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Koleck</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Dreisbach</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bourne</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Bakken</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing of symptoms documented in free-text narratives of electronic health records: a systematic review</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2019</year>
          <month>04</month>
          <day>01</day>
          <volume>26</volume>
          <issue>4</issue>
          <fpage>364</fpage>
          <lpage>379</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30726935"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocy173</pub-id>
          <pub-id pub-id-type="medline">30726935</pub-id>
          <pub-id pub-id-type="pii">5307912</pub-id>
          <pub-id pub-id-type="pmcid">PMC6657282</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Masanz</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ogren</surname>
              <given-names>PV</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kipper-Schuler</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Chute</surname>
              <given-names>CG</given-names>
            </name>
          </person-group>
          <article-title>Mayo clinical Text Analysis and Knowledge Extraction System (cTAKES): architecture, component evaluation and applications</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <volume>17</volume>
          <issue>5</issue>
          <fpage>507</fpage>
          <lpage>13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/20819853"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.2009.001560</pub-id>
          <pub-id pub-id-type="medline">20819853</pub-id>
          <pub-id pub-id-type="pii">17/5/507</pub-id>
          <pub-id pub-id-type="pmcid">PMC2995668</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aronson</surname>
              <given-names>AR</given-names>
            </name>
          </person-group>
          <article-title>Effective mapping of biomedical text to the UMLS Metathesaurus: the MetaMap program</article-title>
          <source>Proc AMIA Symp</source>
          <year>2001</year>
          <fpage>17</fpage>
          <lpage>21</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/11825149"/>
          </comment>
          <pub-id pub-id-type="medline">11825149</pub-id>
          <pub-id pub-id-type="pii">D010001275</pub-id>
          <pub-id pub-id-type="pmcid">PMC2243666</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vijayakrishnan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Steinhubl</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Byrd</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Daar</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>deFilippi</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ebadollahi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>WF</given-names>
            </name>
          </person-group>
          <article-title>Prevalence of heart failure signs and symptoms in a large primary care population identified through the use of text and data mining of the electronic health record</article-title>
          <source>J Card Fail</source>
          <year>2014</year>
          <month>07</month>
          <volume>20</volume>
          <issue>7</issue>
          <fpage>459</fpage>
          <lpage>64</lpage>
          <pub-id pub-id-type="doi">10.1016/j.cardfail.2014.03.008</pub-id>
          <pub-id pub-id-type="medline">24709663</pub-id>
          <pub-id pub-id-type="pii">S1071-9164(14)00134-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC4083004</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jackson</surname>
              <given-names>RG</given-names>
            </name>
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Jayatilleke</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kolliakou</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ball</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gorrell</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dobson</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing to extract symptoms of severe mental illness from clinical text: the Clinical Record Interactive Search Comprehensive Data Extraction (CRIS-CODE) project</article-title>
          <source>BMJ Open</source>
          <year>2017</year>
          <month>01</month>
          <day>17</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>e012012</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmjopen.bmj.com/lookup/pmidlookup?view=long&#38;pmid=28096249"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjopen-2016-012012</pub-id>
          <pub-id pub-id-type="medline">28096249</pub-id>
          <pub-id pub-id-type="pii">bmjopen-2016-012012</pub-id>
          <pub-id pub-id-type="pmcid">PMC5253558</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Forsyth</surname>
              <given-names>AW</given-names>
            </name>
            <name name-style="western">
              <surname>Barzilay</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hughes</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>Lui</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lorenz</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Enzinger</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tulsky</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Lindvall</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Machine learning methods to extract documentation of breast cancer symptoms from electronic health records</article-title>
          <source>J Pain Symptom Manage</source>
          <year>2018</year>
          <month>06</month>
          <volume>55</volume>
          <issue>6</issue>
          <fpage>1492</fpage>
          <lpage>1499</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0885-3924(18)30082-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jpainsymman.2018.02.016</pub-id>
          <pub-id pub-id-type="medline">29496537</pub-id>
          <pub-id pub-id-type="pii">S0885-3924(18)30082-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding</article-title>
          <year>2019</year>
          <conf-name>Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</conf-name>
          <conf-date>June 2-7, 2019</conf-date>
          <conf-loc>Minneapolis, MN</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/N19-1423.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alsentzer</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Boag</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>WH</given-names>
            </name>
            <name name-style="western">
              <surname>Jindi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Naumann</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>McDermott</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Publicly Available Clinical BERT Embeddings</article-title>
          <year>2019</year>
          <conf-name>2nd Clinical Natural Language Processing Workshop</conf-name>
          <conf-date>June 7, 2019</conf-date>
          <conf-loc>Minneapolis, MN</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/W19-1909.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>OpenAI</collab>
            <name name-style="western">
              <surname>Achiam</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Adler</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmad</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Akkaya</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Aleman</surname>
              <given-names>FL</given-names>
            </name>
            <name name-style="western">
              <surname>Almeida</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>GPT-4 Technical Report</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <access-date>2025-05-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2303.08774">https://arxiv.org/abs/2303.08774</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vaswani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Parmar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Uszkoreit</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gomez</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>Ł</given-names>
            </name>
            <name name-style="western">
              <surname>Polosukhin</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Attention is all you need</article-title>
          <source>NIPS'17: Proceedings of the 31st International Conference on Neural Information Processing Systems</source>
          <year>2017</year>
          <conf-name>31st International Conference on Neural Information Processing Systems</conf-name>
          <conf-date>December 4-9, 2017</conf-date>
          <conf-loc>Long Beach, CA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tavabi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Raza</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Golchin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hogue</surname>
              <given-names>GD</given-names>
            </name>
            <name name-style="western">
              <surname>Kiapour</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>Disparities in cannabis use and documentation in electronic health records among children and young adults</article-title>
          <source>NPJ Digit Med</source>
          <year>2023</year>
          <month>08</month>
          <day>08</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>138</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-023-00885-w"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-023-00885-w</pub-id>
          <pub-id pub-id-type="medline">37553423</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-023-00885-w</pub-id>
          <pub-id pub-id-type="pmcid">PMC10409778</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Gandhi</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Storey</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>A deep language model for symptom extraction from clinical text and its application to extract COVID-19 symptoms from social media</article-title>
          <source>IEEE J Biomed Health Inform</source>
          <year>2022</year>
          <month>4</month>
          <volume>26</volume>
          <issue>4</issue>
          <fpage>1737</fpage>
          <lpage>1748</lpage>
          <pub-id pub-id-type="doi">10.1109/jbhi.2021.3123192</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Altosaar</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ranganath</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>ClinicalBERT: Modeling Clinical Notes and Predicting Hospital Readmission</article-title>
          <source>arXiv</source>
          <year>2019</year>
          <access-date>2025-05-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1904.05342v3">https://arxiv.org/abs/1904.05342v3</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eyre</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Peterson</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Alba</surname>
              <given-names>PR</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Box</surname>
              <given-names>TL</given-names>
            </name>
            <name name-style="western">
              <surname>DuVall</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Patterson</surname>
              <given-names>OV</given-names>
            </name>
          </person-group>
          <article-title>Launching into clinical space with medspaCy: a new clinical text processing toolkit in Python</article-title>
          <source>arXiv</source>
          <year>2021</year>
          <access-date>2025-05-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2106.07799">https://arxiv.org/abs/2106.07799</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>So</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>BioBERT: a pre-trained biomedical language representation model for biomedical text mining</article-title>
          <source>Bioinformatics</source>
          <year>2020</year>
          <month>02</month>
          <day>15</day>
          <volume>36</volume>
          <issue>4</issue>
          <fpage>1234</fpage>
          <lpage>1240</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31501885"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btz682</pub-id>
          <pub-id pub-id-type="medline">31501885</pub-id>
          <pub-id pub-id-type="pii">5566506</pub-id>
          <pub-id pub-id-type="pmcid">PMC7703786</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wolf</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Debut</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sanh</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Chaumond</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Delangue</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Moi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cistac</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Rault</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Louf</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Funtowicz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Davison</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shleifer</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>von Platen</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Jernite</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Plu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Scao</surname>
              <given-names>TL</given-names>
            </name>
            <name name-style="western">
              <surname>Gugger</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Drame</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lhoest</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Rush</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>Transformers: State-of-the-Art Natural Language Processing</article-title>
          <year>2020</year>
          <conf-name>Conference on Empirical Methods in Natural Language Processing: System Demonstrations</conf-name>
          <conf-date>November 16-20, 2020</conf-date>
          <conf-loc>Online</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2020.emnlp-demos.6.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Billot</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Magdamo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Arnold</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Das</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Iglesias</surname>
              <given-names>JE</given-names>
            </name>
          </person-group>
          <article-title>Robust machine learning segmentation for large-scale analysis of heterogeneous clinical brain MRI datasets</article-title>
          <source>Proc Natl Acad Sci USA</source>
          <year>2023</year>
          <month>02</month>
          <day>21</day>
          <volume>120</volume>
          <issue>9</issue>
          <fpage>e2216399120</fpage>
          <pub-id pub-id-type="doi">10.1073/PNAS.2216399120</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Collette</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Degueldre</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sterpenich</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Majerus</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Salmon</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>The neural correlates of verbal short-term memory in Alzheimer's disease: an fMRI study</article-title>
          <source>Brain</source>
          <year>2009</year>
          <month>07</month>
          <day>11</day>
          <volume>132</volume>
          <issue>Pt 7</issue>
          <fpage>1833</fpage>
          <lpage>46</lpage>
          <pub-id pub-id-type="doi">10.1093/brain/awp075</pub-id>
          <pub-id pub-id-type="medline">19433442</pub-id>
          <pub-id pub-id-type="pii">awp075</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zola-Morgan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Squire</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Amaral</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Human amnesia and the medial temporal region: enduring memory impairment following a bilateral lesion limited to field CA1 of the hippocampus</article-title>
          <source>J. Neurosci</source>
          <year>1986</year>
          <month>10</month>
          <day>01</day>
          <volume>6</volume>
          <issue>10</issue>
          <fpage>2950</fpage>
          <lpage>2967</lpage>
          <pub-id pub-id-type="doi">10.1523/jneurosci.06-10-02950.1986</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Scoville</surname>
              <given-names>WB</given-names>
            </name>
            <name name-style="western">
              <surname>Milner</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Loss of recent memory after bilateral hippocampal lesions</article-title>
          <source>J Neurol Neurosurg Psychiatry</source>
          <year>1957</year>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>11</fpage>
          <lpage>21</lpage>
          <pub-id pub-id-type="doi">10.1136/jnnp.20.1.11</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Geyer</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Matelli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Luppino</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Zilles</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Functional neuroanatomy of the primate isocortical motor system</article-title>
          <source>Anat Embryol (Berl)</source>
          <year>2000</year>
          <month>12</month>
          <day>20</day>
          <volume>202</volume>
          <issue>6</issue>
          <fpage>443</fpage>
          <lpage>74</lpage>
          <pub-id pub-id-type="doi">10.1007/s004290000127</pub-id>
          <pub-id pub-id-type="medline">11131014</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Woolsey</surname>
              <given-names>CN</given-names>
            </name>
            <name name-style="western">
              <surname>Settlage</surname>
              <given-names>PH</given-names>
            </name>
            <name name-style="western">
              <surname>Meyer</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Sencer</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Pinto Hamuy</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Travis</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>Patterns of localization in precentral and "supplementary" motor areas and their relation to the concept of a premotor area</article-title>
          <source>Res Publ Assoc Res Nerv Ment Dis</source>
          <year>1952</year>
          <volume>30</volume>
          <fpage>238</fpage>
          <lpage>64</lpage>
          <pub-id pub-id-type="medline">12983675</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Groenewegen</surname>
              <given-names>HJ</given-names>
            </name>
          </person-group>
          <article-title>The basal ganglia and motor control</article-title>
          <source>Neural Plasticity</source>
          <year>2003</year>
          <month>01</month>
          <volume>10</volume>
          <issue>1-2</issue>
          <fpage>107</fpage>
          <lpage>120</lpage>
          <pub-id pub-id-type="doi">10.1155/np.2003.107</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>De Zeeuw</surname>
              <given-names>CI</given-names>
            </name>
            <name name-style="western">
              <surname>Ten Brinke</surname>
              <given-names>MM</given-names>
            </name>
          </person-group>
          <article-title>Motor learning and the cerebellum</article-title>
          <source>Cold Spring Harb Perspect Biol</source>
          <year>2015</year>
          <month>09</month>
          <day>01</day>
          <volume>7</volume>
          <issue>9</issue>
          <fpage>a021683</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26330521"/>
          </comment>
          <pub-id pub-id-type="doi">10.1101/cshperspect.a021683</pub-id>
          <pub-id pub-id-type="medline">26330521</pub-id>
          <pub-id pub-id-type="pii">7/9/a021683</pub-id>
          <pub-id pub-id-type="pmcid">PMC4563713</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Benjamini</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hochberg</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Controlling the false discovery rate: a practical and powerful approach to multiple testing</article-title>
          <source>Journal of the Royal Statistical Society: Series B (Methodological)</source>
          <year>1995</year>
          <volume>57</volume>
          <issue>1</issue>
          <fpage>289</fpage>
          <lpage>300</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://rss.onlinelibrary.wiley.com/doi/10.1111/j.2517-6161.1995.tb02031.x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/j.2517-6161.1995.tb02031.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rascovsky</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hodges</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Knopman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Mendez</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Kramer</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Neuhaus</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>van Swieten</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Seelaar</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Dopper</surname>
              <given-names>EGP</given-names>
            </name>
            <name name-style="western">
              <surname>Onyike</surname>
              <given-names>CU</given-names>
            </name>
            <name name-style="western">
              <surname>Hillis</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Josephs</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Boeve</surname>
              <given-names>BF</given-names>
            </name>
            <name name-style="western">
              <surname>Kertesz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Seeley</surname>
              <given-names>WW</given-names>
            </name>
            <name name-style="western">
              <surname>Rankin</surname>
              <given-names>KP</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Gorno-Tempini</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Rosen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Prioleau-Latham</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kipps</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Lillo</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Piguet</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Rohrer</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Rossor</surname>
              <given-names>MN</given-names>
            </name>
            <name name-style="western">
              <surname>Warren</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Fox</surname>
              <given-names>NC</given-names>
            </name>
            <name name-style="western">
              <surname>Galasko</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Salmon</surname>
              <given-names>DP</given-names>
            </name>
            <name name-style="western">
              <surname>Black</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Mesulam</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Weintraub</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dickerson</surname>
              <given-names>BC</given-names>
            </name>
            <name name-style="western">
              <surname>Diehl-Schmid</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pasquier</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Deramecourt</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Lebert</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Pijnenburg</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chow</surname>
              <given-names>TW</given-names>
            </name>
            <name name-style="western">
              <surname>Manes</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Grafman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cappa</surname>
              <given-names>SF</given-names>
            </name>
            <name name-style="western">
              <surname>Freedman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Grossman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>BL</given-names>
            </name>
          </person-group>
          <article-title>Sensitivity of revised diagnostic criteria for the behavioural variant of frontotemporal dementia</article-title>
          <source>Brain</source>
          <year>2011</year>
          <month>09</month>
          <volume>134</volume>
          <issue>Pt 9</issue>
          <fpage>2456</fpage>
          <lpage>77</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/21810890"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/brain/awr179</pub-id>
          <pub-id pub-id-type="medline">21810890</pub-id>
          <pub-id pub-id-type="pii">awr179</pub-id>
          <pub-id pub-id-type="pmcid">PMC3170532</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>EE</given-names>
            </name>
            <name name-style="western">
              <surname>Wright</surname>
              <given-names>CB</given-names>
            </name>
          </person-group>
          <article-title>Etiology, clinical manifestations, and diagnosis of vascular dementia</article-title>
          <source>UpToDate</source>
          <access-date>2025-05-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.uptodate.com/contents/etiology-clinical-manifestations-and-diagnosis-of-vascular-dementia">https://www.uptodate.com/contents/etiology-clinical-manifestations-and-diagnosis-of-vascular-dementia</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Prakash</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dupre</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Østbye</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Extracting critical information from unstructured clinicians' notes data to identify dementia severity using a rule-based approach: feasibility study</article-title>
          <source>JMIR Aging</source>
          <year>2024</year>
          <month>09</month>
          <day>24</day>
          <volume>7</volume>
          <fpage>e57926</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aging.jmir.org/2024//e57926/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/57926</pub-id>
          <pub-id pub-id-type="medline">39316421</pub-id>
          <pub-id pub-id-type="pii">v7i1e57926</pub-id>
          <pub-id pub-id-type="pmcid">PMC11462099</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Prosperi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>DeKosky</surname>
              <given-names>ST</given-names>
            </name>
            <name name-style="western">
              <surname>Farrer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bian</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Assess the documentation of cognitive tests and biomarkers in electronic health records via natural language processing for Alzheimer's disease and related dementias</article-title>
          <source>Int J Med Inform</source>
          <year>2023</year>
          <month>02</month>
          <volume>170</volume>
          <fpage>104973</fpage>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2022.104973</pub-id>
          <pub-id pub-id-type="medline">36577203</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(22)00287-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC11325083</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alvarez</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Emory</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Executive function and the frontal lobes: a meta-analytic review</article-title>
          <source>Neuropsychol Rev</source>
          <year>2006</year>
          <month>03</month>
          <day>1</day>
          <volume>16</volume>
          <issue>1</issue>
          <fpage>17</fpage>
          <lpage>42</lpage>
          <pub-id pub-id-type="doi">10.1007/s11065-006-9002-x</pub-id>
          <pub-id pub-id-type="medline">16794878</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fuster</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Frontal lobe and cognitive development</article-title>
          <source>J Neurocytol</source>
          <year>2002</year>
          <volume>31</volume>
          <issue>3-5</issue>
          <fpage>373</fpage>
          <lpage>85</lpage>
          <pub-id pub-id-type="doi">10.1023/a:1024190429920</pub-id>
          <pub-id pub-id-type="medline">12815254</pub-id>
          <pub-id pub-id-type="pii">5123790</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thierry</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gioanni</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Dégénétais</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Glowinski</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Hippocampo-prefrontal cortex pathway: anatomical and electrophysiological characteristics</article-title>
          <source>Hippocampus</source>
          <year>2000</year>
          <volume>10</volume>
          <issue>4</issue>
          <fpage>411</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1002/1098-1063(2000)10:4&#60;411::AID-HIPO7&#62;3.0.CO;2-A</pub-id>
          <pub-id pub-id-type="medline">10985280</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>SE</given-names>
            </name>
          </person-group>
          <article-title>Frontotemporal dementia: Epidemiology, pathology, and pathogenesis</article-title>
          <source>UpToDate</source>
          <access-date>2025-05-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.uptodate.com/contents/frontotemporal-dementia-epidemiology-pathology-and-pathogenesis">https://www.uptodate.com/contents/frontotemporal-dementia-epidemiology-pathology-and-pathogenesis</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mormont</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Laurier-Grymonprez</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Baisset-Mouly</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Pasquier</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>[The profile of memory disturbance in early Lewy body dementia differs from that in Alzheimer's disease]</article-title>
          <source>Rev Neurol (Paris)</source>
          <year>2003</year>
          <month>09</month>
          <volume>159</volume>
          <issue>8-9</issue>
          <fpage>762</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="medline">13679718</pub-id>
          <pub-id pub-id-type="pii">MDOI-RN-09-2003-159-8-9-0035-3787-101019-ART4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Uc</surname>
              <given-names>EY</given-names>
            </name>
            <name name-style="western">
              <surname>Rizzo</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>SW</given-names>
            </name>
            <name name-style="western">
              <surname>Sparks</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Rodnitzky</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Dawson</surname>
              <given-names>JD</given-names>
            </name>
          </person-group>
          <article-title>Impaired navigation in drivers with Parkinson's disease</article-title>
          <source>Brain</source>
          <year>2007</year>
          <month>09</month>
          <day>01</day>
          <volume>130</volume>
          <issue>Pt 9</issue>
          <fpage>2433</fpage>
          <lpage>40</lpage>
          <pub-id pub-id-type="doi">10.1093/brain/awm178</pub-id>
          <pub-id pub-id-type="medline">17686809</pub-id>
          <pub-id pub-id-type="pii">awm178</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mathias</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Lucas</surname>
              <given-names>LK</given-names>
            </name>
          </person-group>
          <article-title>Cognitive predictors of unsafe driving in older drivers: a meta-analysis</article-title>
          <source>International Psychogeriatrics</source>
          <year>2009</year>
          <month>08</month>
          <day>01</day>
          <volume>21</volume>
          <issue>4</issue>
          <fpage>637</fpage>
          <lpage>653</lpage>
          <pub-id pub-id-type="doi">10.1017/s1041610209009119</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Maguire</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Nannery</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Spiers</surname>
              <given-names>HJ</given-names>
            </name>
          </person-group>
          <article-title>Navigation around London by a taxi driver with bilateral hippocampal lesions</article-title>
          <source>Brain</source>
          <year>2006</year>
          <month>11</month>
          <day>29</day>
          <volume>129</volume>
          <issue>Pt 11</issue>
          <fpage>2894</fpage>
          <lpage>907</lpage>
          <pub-id pub-id-type="doi">10.1093/brain/awl286</pub-id>
          <pub-id pub-id-type="medline">17071921</pub-id>
          <pub-id pub-id-type="pii">129/11/2894</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Anstey</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wood</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lord</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Walker</surname>
              <given-names>JG</given-names>
            </name>
          </person-group>
          <article-title>Cognitive, sensory and physical factors enabling driving safety in older adults</article-title>
          <source>Clin Psychol Rev</source>
          <year>2005</year>
          <month>01</month>
          <volume>25</volume>
          <issue>1</issue>
          <fpage>45</fpage>
          <lpage>65</lpage>
          <pub-id pub-id-type="doi">10.1016/j.cpr.2004.07.008</pub-id>
          <pub-id pub-id-type="medline">15596080</pub-id>
          <pub-id pub-id-type="pii">S0272-7358(04)00134-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schneider</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Arvanitakis</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Bang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Bennett</surname>
              <given-names>DA</given-names>
            </name>
          </person-group>
          <article-title>Mixed brain pathologies account for most dementia cases in community-dwelling older persons</article-title>
          <source>Neurology</source>
          <year>2007</year>
          <month>12</month>
          <day>11</day>
          <volume>69</volume>
          <issue>24</issue>
          <fpage>2197</fpage>
          <lpage>204</lpage>
          <pub-id pub-id-type="doi">10.1212/01.wnl.0000271090.28148.24</pub-id>
          <pub-id pub-id-type="medline">17568013</pub-id>
          <pub-id pub-id-type="pii">01.wnl.0000271090.28148.24</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stampfer</surname>
              <given-names>MJ</given-names>
            </name>
          </person-group>
          <article-title>Cardiovascular disease and Alzheimer's disease: common links</article-title>
          <source>J Intern Med</source>
          <year>2006</year>
          <month>09</month>
          <day>26</day>
          <volume>260</volume>
          <issue>3</issue>
          <fpage>211</fpage>
          <lpage>23</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://onlinelibrary.wiley.com/doi/10.1111/j.1365-2796.2006.01687.x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/j.1365-2796.2006.01687.x</pub-id>
          <pub-id pub-id-type="medline">16918818</pub-id>
          <pub-id pub-id-type="pii">JIM1687</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xue</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kowshik</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Lteif</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Puducheri</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jasodanand</surname>
              <given-names>VH</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>OT</given-names>
            </name>
            <name name-style="western">
              <surname>Walia</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Guney</surname>
              <given-names>OB</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Poésy</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kaliaev</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Andreu-Arasa</surname>
              <given-names>VC</given-names>
            </name>
            <name name-style="western">
              <surname>Dwyer</surname>
              <given-names>BC</given-names>
            </name>
            <name name-style="western">
              <surname>Farris</surname>
              <given-names>CW</given-names>
            </name>
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kedar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mian</surname>
              <given-names>AZ</given-names>
            </name>
            <name name-style="western">
              <surname>Murman</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>O'Shea</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Rohatgi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Saint-Hilaire</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sartor</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Setty</surname>
              <given-names>BN</given-names>
            </name>
            <name name-style="western">
              <surname>Small</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Swaminathan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Taraschenko</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Karjadi</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Alvin Ang</surname>
              <given-names>TF</given-names>
            </name>
            <name name-style="western">
              <surname>Bargal</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Plummer</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Poston</surname>
              <given-names>KL</given-names>
            </name>
            <name name-style="western">
              <surname>Ahangaran</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Au</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kolachalama</surname>
              <given-names>VB</given-names>
            </name>
          </person-group>
          <article-title>AI-based differential diagnosis of dementia etiologies on multimodal data</article-title>
          <source>Nat Med</source>
          <year>2024</year>
          <month>10</month>
          <day>04</day>
          <volume>30</volume>
          <issue>10</issue>
          <fpage>2977</fpage>
          <lpage>2989</lpage>
          <pub-id pub-id-type="doi">10.1038/s41591-024-03118-z</pub-id>
          <pub-id pub-id-type="medline">38965435</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-024-03118-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC11485262</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
