<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="review-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR AI</journal-id><journal-id journal-id-type="publisher-id">ai</journal-id><journal-id journal-id-type="index">41</journal-id><journal-title>JMIR AI</journal-title><abbrev-journal-title>JMIR AI</abbrev-journal-title><issn pub-type="epub">2817-1705</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v5i1e73481</article-id><article-id pub-id-type="doi">10.2196/73481</article-id><article-categories><subj-group subj-group-type="heading"><subject>Review</subject></subj-group></article-categories><title-group><article-title>Natural Language Processing of Clinical Notes for Cancer Research and Patient Care Prior to Widespread Adoption of Generative AI: Scoping Review</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes" equal-contrib="yes"><name name-style="western"><surname>Kayira</surname><given-names>Alfred B</given-names></name><degrees>MSc, MPH, MRES</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Elyazori</surname><given-names>Hadeel R A</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lybarger</surname><given-names>Kevin</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Walter</surname><given-names>Fiona M</given-names></name><degrees>MA, MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Chelala</surname><given-names>Claude</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Funston</surname><given-names>Garth</given-names></name><degrees>MB BChir, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Centre for Cancer Screening, Prevention, and Early Diagnosis, Wolfson Institute of Population Health, Queen Mary University of London</institution><addr-line>Charterhouse Square</addr-line><addr-line>London</addr-line><country>United Kingdom</country></aff><aff id="aff2"><institution>Department of Information Sciences and Technology, College of Engineering &#x0026; Computing, George Mason University</institution><addr-line>Fairfax</addr-line><addr-line>VA</addr-line><country>United States</country></aff><aff id="aff3"><institution>Barts Cancer Institute, Queen Mary University of London</institution><addr-line>London</addr-line><country>United Kingdom</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Coristine</surname><given-names>Andrew</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Garc&#x00ED;a-Barrag&#x00E1;n</surname><given-names>&#x00C1;lvaro</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Chrimes</surname><given-names>Dillon</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Adegoke</surname><given-names>Kola</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Alfred B Kayira, MSc, MPH, MRES, Centre for Cancer Screening, Prevention, and Early Diagnosis, Wolfson Institute of Population Health, Queen Mary University of London, Charterhouse Square, London, EC1M 6BQ, , United Kingdom, 44 7415302686; <email>a.b.kayira@qmul.ac.uk</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>14</day><month>5</month><year>2026</year></pub-date><volume>5</volume><elocation-id>e73481</elocation-id><history><date date-type="received"><day>05</day><month>03</month><year>2025</year></date><date date-type="rev-recd"><day>15</day><month>02</month><year>2026</year></date><date date-type="accepted"><day>16</day><month>02</month><year>2026</year></date></history><copyright-statement>&#x00A9; Alfred B Kayira, Hadeel R A Elyazori, Kevin Lybarger, Fiona M Walter, Claude Chelala, Garth Funston. Originally published in JMIR AI (<ext-link ext-link-type="uri" xlink:href="https://ai.jmir.org">https://ai.jmir.org</ext-link>), 14.5.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR AI, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.ai.jmir.org/">https://www.ai.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://ai.jmir.org/2026/1/e73481"/><abstract><sec><title>Background</title><p>Clinical notes are the most abundant data type within electronic health records; however, their highly unstructured format presents significant challenges for supervised natural language processing (NLP) methods. The NLP community is increasingly adapting large language models to analyze clinical notes, achieving strong performance and generalizability with minimal task-specific fine-tuning. We conducted a scoping review of NLP methods applied to clinical notes prior to widespread adoption of generative artificial intelligence (AI) to establish a pre&#x2013;large language model methodological baseline, showcase potential clinical utility, and highlight key challenges and limitations of extractive, supervised techniques that generative AI approaches may need to overcome.</p></sec><sec><title>Objective</title><p>This review aimed (1) to characterize the clinical notes used, (2) to identify NLP techniques used to analyze these notes, (3) to determine the clinical applications of NLP in cancer research and patient care, and (4) to highlight challenges and limitations of traditional pregenerative AI methods.</p></sec><sec sec-type="methods"><title>Methods</title><p>We systematically searched MEDLINE, Embase, Scopus, and Web of Science for English-language studies published from January 1, 2014, to March 8, 2024. Retrieved references were imported into Covidence, a web-based platform that streamlines management of reviews. Two authors (ABK and HRAE) independently screened studies for eligibility and extracted data using a predefined data extraction template.</p></sec><sec sec-type="results"><title>Results</title><p>A total of 226 studies were included in the review. Research using NLP to derive insights from clinical notes grew significantly, from 4 studies in 2014 to 43 in 2023. NLP methods have evolved from predominantly rule-based and ontology-driven approaches (2014-2017) to hybrid approaches that combine these with deep neural models such as Bidirectional Encoder Representations from Transformers (2018-2024). Most studies (161/226, 71.2%) developed their systems using small, single-institution datasets. Supervised learning approaches with manually annotated corpora were predominant (181/226, 80.1%). Most studies (174/226, 77%) focused on information extraction, with a few applying the extracted data to downstream tasks such as diagnostic and prognostic classification. Clinical domain pretrained models outperformed general domain pretrained models in the majority (11/16, 68.8%) of studies that evaluated multiple model types. In total, 25 studies compared their NLP-based systems with current practice in their respective clinical settings and reported potential benefits, including improved data coverage and completeness, faster information extraction, and improved classification or prediction accuracy. No studies evaluated the utility or impact of their systems in real-world clinical practice. The most common challenges reported by authors were restricted access to clinical notes (n=39) and limited data (n=18).</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>The application of NLP to clinical notes in oncology has expanded, but most studies focus on information extraction rather than downstream clinical tasks. Oncology NLP has the potential to advance cancer research and patient care, but barriers remain to robust evaluation and clinical deployment of promising tools. Emerging generative AI approaches will need to overcome these challenges to deliver real-world impact.</p></sec></abstract><kwd-group><kwd>natural language processing</kwd><kwd>clinical notes</kwd><kwd>electronic health records</kwd><kwd>clinical NLP challenges</kwd><kwd>cancer</kwd><kwd>scoping review</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background</title><p>Cancer is a major cause of morbidity and mortality globally [<xref ref-type="bibr" rid="ref1">1</xref>], with 19.3 million new cases and 10 million deaths reported in 2020 [<xref ref-type="bibr" rid="ref1">1</xref>]. Incidence is projected to rise by 55% by 2040 due to population growth and aging [<xref ref-type="bibr" rid="ref2">2</xref>]. Research leveraging real-world data is important to support prevention, early detection, and optimized treatment, and ultimately improve patient outcomes, including survival. Electronic health records (EHRs), digital profiles of patient histories created and managed by health care institutions, provide a valuable real-world data resource for cancer research and improve patient care.</p><p>While EHR systems have become increasingly available [<xref ref-type="bibr" rid="ref3">3</xref>], only a small portion consists of structured data (eg, clinical codes, vital signs, clinical and laboratory measurements, and demographics) that can be easily extracted and analyzed using conventional statistical and machine learning methods. Most data (80%) exist in unstructured forms, including clinical notes, diagnostic reports (eg, pathology and radiology), and images [<xref ref-type="bibr" rid="ref4">4</xref>], limiting usability [<xref ref-type="bibr" rid="ref5">5</xref>]. Natural language processing (NLP)&#x2014;a subfield of artificial intelligence (AI) that enables computers to understand, interpret, and generate human language&#x2014;offers a promising approach to unlock insights from unstructured clinical narratives such as clinical notes and diagnostic reports, enabling their use in research and patient care.</p><p>While both diagnostic reports and clinical notes contain valuable information, they differ in complexity for NLP. Diagnostic reports are typically formal and standardized, making them relatively straightforward to process. In contrast, clinical notes are highly diverse due to variations in recording practices across clinicians and health care institutions [<xref ref-type="bibr" rid="ref6">6</xref>]. They often feature incomplete sentences, poor punctuation, nonstandard abbreviations, shorthand, ambiguous terms, and spelling errors. These characteristics pose significant challenges for NLP processing, even with advanced methodological approaches such as pretrained language models (PLMs), for example, Bidirectional Encoder Representations from Transformers (BERT) [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref9">9</xref>], which dominated the general NLP domain since the introduction of the BERT model in 2018 [<xref ref-type="bibr" rid="ref10">10</xref>].</p><p>However, recent advances in generative AI are reshaping the field of clinical NLP. Large language models (LLMs)&#x2014;a subset of PLMs designed for generative tasks (eg, OpenAI&#x2019;s GPT [<xref ref-type="bibr" rid="ref11">11</xref>] and Meta&#x2019;s LLaMA [<xref ref-type="bibr" rid="ref12">12</xref>])&#x2014;are transforming clinical NLP by enabling broader generalization with minimal task-specific fine-tuning. LLMs (GPT-4, Gemma3-27B, and DeepSeek-14B), applied using prompt engineering or task-specific fine-tuning, have demonstrated strong performance in extracting treatment histories [<xref ref-type="bibr" rid="ref13">13</xref>], social and behavioral determinants of health (employment, housing, marital status, alcohol use, tobacco use, and drug use) [<xref ref-type="bibr" rid="ref14">14</xref>], and neurofibromatosis type 1&#x2013;relevant phenotypes [<xref ref-type="bibr" rid="ref15">15</xref>] from clinical notes. Recent review studies highlight increasing interest in the use of LLMs with prompt-based strategies, including zero-shot and few-shot prompting, for information extraction (IE) [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>], as well as for tasks such as information summarization, translation, and clinical communication [<xref ref-type="bibr" rid="ref18">18</xref>].</p><p>Given their strong early performance, which has generated considerable interest within the NLP community, LLMs may emerge as a dominant approach, potentially replacing traditional supervised deep learning methods (eg, recurrent neural networks [RNNs], convolutional neural networks [CNNs], and BERT-based models). To better understand the value that LLMs add beyond established NLP approaches, we conducted a scoping review of NLP methods applied to cancer clinical notes prior to the widespread use of generative AI, providing a comprehensive overview of pre-LLM methods, their potential clinical utility, and the limitations and challenges likely to extend to generative AI.</p><p>Several reviews have examined the application of NLP to clinical notes before the adoption of LLMs; however, none have specifically focused on clinical notes as the primary text. Prior reviews have included clinical notes only as a subset of broader document categories. Only 35% (43/123), 22% (5/23), and 12% (2/17) of studies included in Wang et al [<xref ref-type="bibr" rid="ref19">19</xref>], Li et al [<xref ref-type="bibr" rid="ref20">20</xref>], and Gholipour et al [<xref ref-type="bibr" rid="ref21">21</xref>], respectively, used clinical notes, often alongside other medical documents (eg, radiology and pathology reports). Sangariyavanich et al [<xref ref-type="bibr" rid="ref22">22</xref>] included 17 studies but did not specify the proportion or extent of clinical note use. Furthermore, these reviews focused on one NLP task or the other, for example, IE [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref21">21</xref>], diagnostic classification [<xref ref-type="bibr" rid="ref20">20</xref>], and prognostic classification [<xref ref-type="bibr" rid="ref22">22</xref>]. Broader reviews by Wang et al [<xref ref-type="bibr" rid="ref23">23</xref>], Sim et al [<xref ref-type="bibr" rid="ref24">24</xref>], and Sheikhalishahi et al [<xref ref-type="bibr" rid="ref25">25</xref>] covered studies, which included substantial volumes of clinical notes but were not cancer-specific, limiting their utility to the cancer domain. Additionally, these reviews only include studies published up to 2020, predating the widespread adoption of BERT-based PLMs. Notably, in Sheikhalishahi et al [<xref ref-type="bibr" rid="ref25">25</xref>], only 3 of the 106 studies used deep learning approaches.</p></sec><sec id="s1-2"><title>Objectives</title><p>This review provides a comprehensive synthesis of NLP applications to clinical notes in cancer research prior to widespread experimentation with LLMs. Unlike prior review that included studies based solely on structured diagnostic reports, we restricted inclusion to studies involving clinical notes (exclusively or in combination with diagnostic reports or other documents), so our findings more closely reflect the distinctive challenges&#x2014;including acquisition&#x2014;and methodological choices associated with this particularly complex text. We also diverge from earlier reviews by imposing no restrictions on the NLP task, allowing a broader characterization of cancer-related use cases beyond conventional diagnostic or prognostic classification.</p><p>By systematically analyzing pregenerative AI methodologies, this review provides important benchmarks for assessing the real &#x201C;value add&#x201D; of LLMs, highlights the limitations of extractive, supervised approaches, and anticipates challenges that may need to be overcome. Specifically, our objectives are (1) to characterize the clinical notes used in NLP studies, including their sources and properties; (2) to identify NLP techniques (including annotation methods) used to analyze these notes and examine how these methodologies have evolved over time; (3) to determine the clinical applications of NLP in cancer research and patient care, including reported clinical impact; and (4) to highlight the challenges encountered by researchers in the field.</p></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><p>This review follows the PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses Extension for Scoping Reviews) [<xref ref-type="bibr" rid="ref26">26</xref>].</p><sec id="s2-1"><title>Working Definitions</title><p>We broadly defined NLP as the application of computational techniques to process and analyze unstructured clinical text. This encompasses a diverse range of methods, including domain-specific dictionaries, medical ontologies (eg, Unified Medical Language System [UMLS]), ontology-based tools (eg, MetaMap and Clinical Text Analysis and Knowledge Extraction System), handcrafted rules or search strings, rule-based tools (eg, ConText and NegEx), classical machine learning models (eg, support vector machine), neural networks (eg, RNN), PLMs (eg, BERT), and LLMs (a subset of PLMs distinguished by their larger parameter scale and enhanced capacity for broad generalization with minimal task-specific fine-tuning [eg, GPT]).</p><p>Clinical notes were defined as free-text narratives written by health care providers during patient encounters, documenting patient symptoms and signs, investigations, diagnoses, treatment, or treatment plans. They detail a patient&#x2019;s social and medical history, disease progression, and outcomes. They are distinguished from diagnostic reports, in that they later provide results of diagnostic investigations or imaging studies, often objective and structured. Clinical notes may, however, contain descriptions and interpretations of diagnostic results from these reports.</p></sec><sec id="s2-2"><title>Search Strategy and Information Sources</title><p>We developed a three-concept search criterion covering (1) NLP, (2) EHR or electronic medical record, and (3) cancer or oncology. Predetermined key terms relating to these concepts were used to search MEDLINE through PubMed. These were further expanded by scanning the titles and abstracts of retrieved records. To avoid missing studies in which clinical notes were only one of several document types and therefore not mentioned in the title or abstract, we intentionally kept the EHR or electronic medical record concept broad. The final search criteria for all 4 databases are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><p>We searched MEDLINE (via PubMed), Embase, Web of Science, and Scopus for primary studies that applied NLP to process and analyze clinical notes to generate actionable information for cancer research or patient care. For PubMed, Embase, and Web of Science, we searched across all available fields. In Scopus, the search was limited to the title, abstract, and keywords fields. We used a mix of MeSH term mappings and exact phrase or term searching to balance the sensitivity and precision of the search. All searches were restricted to English-language publications from January 1, 2014, to March 8, 2024.</p><sec id="s2-2-1"><title>Inclusion Criteria</title><p>We included peer-reviewed journal papers and conference papers that (1) applied NLP to clinical notes&#x2014;either exclusively or in combination with other medical documents (eg, pathology, radiology, colonoscopy, or other imaging reports); (2) focused on any part of the cancer care continuum, including screening, diagnosis, staging, treatment, surveillance, outcomes assessment, and risk factor identification or risk stratification; and (3) were conducted in any clinical setting (eg, primary care, outpatient clinics, emergency departments, and hospitals).</p></sec><sec id="s2-2-2"><title>Exclusion Criteria</title><p>We excluded studies that used non-EHR documents (eg, patient-authored text in online health communities), studies using translated text (eg, from one language to English before applying NLP methods), reviews, editorials, commentaries, abstracts, letters, retracted papers, and veterinary studies.</p></sec></sec><sec id="s2-3"><title>Study Selection</title><p>Study screening (title or abstract and full text) was completed in Covidence (Veritas Health Innovation Ltd), a web-based collaboration software platform that streamlines the production of systematic and other literature reviews. References identified through database searches were imported into Covidence, and duplicates were automatically removed.</p><p>Two authors (ABK and HRAE) independently assessed the papers for eligibility based on the title and abstract. Proportionate agreement (the proportion of times that reviewers agree on their assessments) was 96%. Class-specific agreement was 56.2% for the positive (include) class and 97.9% for the negative (exclude) class. Cohen &#x03BA;, which measures the agreement between 2 reviewers (ABK and HRAE) adjusting for the possibility of agreement occurring by chance, was 0.54. Full-text papers were retrieved for studies that passed the title-abstract screening, and the same authors assessed the full texts for eligibility. Proportionate agreement was 81.5%. Class-specific agreement was 86.3% for the positive (include) class and 71.8% for the negative (exclude) class. Cohen &#x03BA; was 0.58. At both stages, discrepancies were discussed and resolved through consensus, with reference to the predefined inclusion or exclusion criteria and the operational definitions of key concepts (NLP, clinical notes, and cancer or oncology). When consensus could not be reached, another author (GF or KL.) adjudicated.</p></sec><sec id="s2-4"><title>Data Extraction and Analysis</title><p>A data extraction template was created in Covidence and refined through several iterations until all authors agreed on the final version. Using this template, we extracted data across 37 predetermined variables, which can be classified into 5 categories: study metadata, clinical note characteristics, methods, applications, and challenges. Two authors (ABK and HRAE) extracted data from 10% of the papers. The extracted data were compared, and inconsistencies were discussed. Concordance was high and so the remaining papers were extracted by 1 reviewer (ABK). Extracted data were analyzed descriptively, providing counts and percentages.</p></sec><sec id="s2-5"><title>Study Quality Assessment</title><p>Given the scoping review methodology and our count-based analyses, a risk of bias or quality assessment was not performed [<xref ref-type="bibr" rid="ref27">27</xref>].</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Search Results</title><p><xref ref-type="fig" rid="figure1">Figure 1</xref> shows the study selection process used to arrive at the included studies. A total of 10,724 records were identified from the databases. After removing duplicates, 7964 records were screened. Of these, 7607 were excluded at the title and abstract screening stage. In the full-text screening stage, 357 papers were assessed for eligibility, and 131 were excluded. Ultimately, 226 studies met the inclusion criteria.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>PRISMA diagram illustrating the study selection process and reasons for exclusion. EHR: electronic health record; NLP: natural language processing; PRISMA: Preferred Reporting Items for Systematic Reviews and Meta-Analyses.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v5i1e73481_fig01.png"/></fig></sec><sec id="s3-2"><title>Distribution of Studies by Country</title><p><xref ref-type="fig" rid="figure2">Figure 2A</xref> illustrates the distribution of included studies based on the country of institution of affiliation of the major (first or corresponding) authors. The majority were from the United States (133/226, 58.8%), followed by China (20/226, 8.8%) and Spain (18/226, 8%).</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Characterization of included studies. (<bold>A</bold>) Distribution of studies by country of institution of affiliation of major authors. (<bold>B</bold>) Document type. (<bold>C</bold>) Clinical note type. (<bold>D</bold>) Language of clinical notes and other medical documents. (<bold>E</bold>) Heterogeneity of the sources of clinical notes and other medical documents. In total, 3 (1.3%) studies had insufficient information to determine the source of clinical notes. (<bold>F</bold>) Accessibility of data used by the studies (publicly available means that authors used a publicly available corpus [majority] or made their corpus publicly available). (<bold>G</bold>) Patient characteristics reported in studies. (<bold>H</bold>) Cancer types targeted by studies. CNS: central nervous system; nos: not otherwise specified.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v5i1e73481_fig02.png"/></fig></sec><sec id="s3-3"><title>Characterization of Clinical Notes</title><p><xref ref-type="fig" rid="figure2">Figure 2B</xref> shows the document types used across included studies. Out of 226 studies, 114 (50.4%) used clinical notes exclusively, while the remainder used clinical notes and other medical documents, primarily pathology and radiology reports. Progress notes (53/226, 23.5%), consultation notes (46/226, 20.4%), and discharge summaries (45/226, 19.9%) were the most common types of clinical notes used in included studies (<xref ref-type="fig" rid="figure2">Figure 2C</xref>). However, in 150 of the 226 (66.4%) studies, authors either used nonspecific terms to describe clinical notes (eg, oncology, urology, and cancer clinic notes) or did not specify the clinical note type. Most of the clinical notes were written in English (156/226, 69%), Spanish (22/226, 9.7%), and Chinese (18/226, 8%; <xref ref-type="fig" rid="figure2">Figure 2D</xref>).</p><p><xref ref-type="fig" rid="figure2">Figure 2E</xref> illustrates heterogeneity in the sources of clinical notes and other medical documents used in the studies. Most studies (161/226, 71.2%) used documents from a single institution, while 27.4% (62/226) included multi-institution data from the same country. No study used documents from more than 1 country. Regarding data availability, 128 of 226 (56.6%) studies did not provide any statement on the accessibility of the corpora used. A few studies (37/226, 16.4%) indicated that their corpus could be made available upon reasonable request, and 12.4% (28/226) either used publicly available corpora (majority) or made their corpus publicly accessible (<xref ref-type="fig" rid="figure2">Figure 2F</xref>).</p><p>Nearly half of the studies (110/226, 48.7%) did not provide any information about the characteristics of the patients associated with the clinical notes they used. When reported, common characteristics included age (96/226, 42.5%), sex or gender (71/226, 31.4%), race (56/226, 24.8%), cancer therapy or management (57/226, 25.2%), and cancer stage or metastasis (49/226, 21.7%; <xref ref-type="fig" rid="figure2">Figure 2G</xref>). The most commonly studied cancers were breast (65/226, 28.8%), lung (60/226, 26.5%), colorectal (32/226, 14.2%), and prostate (29/226, 12.8%; <xref ref-type="fig" rid="figure2">Figure 2H</xref>).</p></sec><sec id="s3-4"><title>NLP Publications and Methods Used by Calendar Year</title><p><xref ref-type="fig" rid="figure3">Figure 3</xref> illustrates the number of studies published annually from January 2014 to March 2024, along with the NLP methods applied to clinical notes. The number of publications per year increased from 4 in 2014 to 43 in 2023.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Model architectures used to analyze clinical notes over the years. Percentages are relative to the number of studies published in that year. The line graph depicts the number of published studies per year. *2024 is a partial year; it includes papers published from January 1, 2024, to March 8, 2024. It is common for researchers to use multiple methods from the same class or different classes (either as discrete models or in hybrid architectures), leading to double-counting. &#x201C;Pretrained language models&#x201D; refers to general-domain pretrained models (eg, BERT and GPT), while &#x201C;pretrained clinical models&#x201D; refers to models with domain-specific pretraining on clinical or biomedical text (eg, BioBERT, ClinicalBERT, and PubMedBERT). These categories are mutually exclusive. BERT: Bidirectional Encoder Representations from Transformers.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v5i1e73481_fig03.png"/></fig><p>NLP methods have evolved over time. Between 2014 and 2017, only ontologies, rule-based approaches, and discrete models were used. Studies using neural networks were first published in 2018, followed by PLMs in 2019 (<xref ref-type="fig" rid="figure3">Figure 3</xref>). While neural networks, including PLMs, have gained popularity since their introduction, ontologies, rule-based approaches, and discrete models remained the most prevalent approaches throughout the review period. However, rule-based approaches and ontologies were often used in hybrid workflows, serving specific preprocessing and postprocessing roles, rather than as standalone solutions. Out of 226 studies, only 7 (3.1%) and 27 (11.9%) exclusively used ontologies and rule-based methods, respectively.</p></sec><sec id="s3-5"><title>Fine-Grained Classification of NLP Methods</title><p>Ontologies were used in 87 of 226 (38.5%) studies, with domain-specific or customized dictionaries being the most common approach (42/87, 48.3%), followed by the UMLS at 41.4% (36/87; <xref ref-type="table" rid="table1">Table 1</xref>). These knowledge resources often supported machine learning and neural models by providing seed terms or domain expertise. Off-the-shelf tools such as MetaMap and Clinical Text Analysis and Knowledge Extraction System, which rely on UMLS mappings to analyze biomedical text, were also used.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Breakdown of methods used in included studies (N=226)<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model architecture</td><td align="left" valign="bottom">Values (N=226), n (%))</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="2">Ontologies (n=87)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Domain-specific dictionary</td><td align="left" valign="top">42 (48.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Unified Medical Language System</td><td align="left" valign="top">36 (41.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>MetaMap</td><td align="left" valign="top">16 (18.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>cTAKES<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td><td align="left" valign="top">10 (11.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>NCBO<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup> BioPortal</td><td align="left" valign="top">7 (8)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>MedTagger</td><td align="left" valign="top">3 (3.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Other</td><td align="left" valign="top">6 (6.9)</td></tr><tr><td align="left" valign="top" colspan="2">Rule-based (n=112)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Rules or RegEx<sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup></td><td align="left" valign="top">112 (100)</td></tr><tr><td align="left" valign="top" colspan="2">Discrete models (n=87)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Support vector machine</td><td align="left" valign="top">29 (33.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Trees</td><td align="left" valign="top">28 (32.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Logistic regression</td><td align="left" valign="top">18 (20.7)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Conditional random fields</td><td align="left" valign="top">16 (18.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Clustering</td><td align="left" valign="top">15 (17.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Other</td><td align="left" valign="top">11 (12.6)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Naive Bayes classifier</td><td align="left" valign="top">5 (5.7)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>K-nearest neighbors classifier</td><td align="left" valign="top">3 (3.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Linear regression</td><td align="left" valign="top">2 (2.3)</td></tr><tr><td align="left" valign="top" colspan="2">Neural networks (n=53)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Recurrent neural network</td><td align="left" valign="top">34 (64.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Convolutional neural network</td><td align="left" valign="top">21 (39.6)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Feed forward neural networks</td><td align="left" valign="top">10 (18.9)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Capsule networks</td><td align="left" valign="top">1 (1.9)</td></tr><tr><td align="left" valign="top" colspan="2">Pretrained language models (n=41)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>BERT<sup><xref ref-type="table-fn" rid="table1fn5">e</xref></sup></td><td align="left" valign="top">39 (95.1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>ChatGPT</td><td align="left" valign="top">1 (2.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Google Bard</td><td align="left" valign="top">1 (2.4)</td></tr><tr><td align="left" valign="top" colspan="2">Pretrained clinical models (n=23)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Clinical BERT</td><td align="left" valign="top">23 (100)</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>Number of model types per study: 91 (40.3%) studies used 1 model type, 92 (40.7%) studies used 2 model types, 26 (11.5%) studies used 3 model types, and 10 (4.4%) studies used 4 model types. Number of model subtypes per study: 76 (33.6%) studies used 1 model subtype, 72 (31.9%) studies used 2 model subtypes, 39 (17.3%) studies used 3 model subtypes, 25 (11.1%) studies used 4 model subtypes, and 4 (1.8%) studies used 5 model subtypes. Pretrained language models are general-domain pretrained models (eg, BERT and GPT), while pretrained clinical models are models pretrained on clinical or biomedical text (eg, BioBERT, ClinicalBERT, and PubMedBERT).</p></fn><fn id="table1fn2"><p><sup>b</sup>cTAKES: Clinical Text Analysis and Knowledge Extraction System. </p></fn><fn id="table1fn3"><p><sup>c</sup>NCBO: National Center for Biomedical Ontology. </p></fn><fn id="table1fn4"><p><sup>d</sup>RegEx: a rule-based algorithm for negation detection in clinical text. </p></fn><fn id="table1fn5"><p><sup>e</sup>BERT: Bidirectional Encoder Representations from Transformers. </p></fn></table-wrap-foot></table-wrap><p>Rule-based methods, including handcrafted rules and off-the-shelf tools such as clinical RegEx and ConText, were used in 112 of 226 (49.6%) studies (<xref ref-type="table" rid="table1">Table 1</xref>), making them the most prevalent, but rarely used in isolation. Rule-based approaches were used in 53 of 114 (46.5%) studies that analyzed clinical notes exclusively and in 64 of 112 (57.1%) studies that analyzed clinical notes in combination with other medical documents. Although the latter proportion was 10.6 percentage points higher, this difference was not statistically significant (2-proportion <italic>z</italic> test: <italic>z</italic>=&#x2212;1.60; <italic>P</italic>=.11).</p><p>Discrete models, encompassing classical machine learning and statistical methods, were used in 87 of 226 (38.5%) studies (<xref ref-type="table" rid="table1">Table 1</xref>). The most common approaches under this category included support vector machines (29/87, 33.3%), tree-based models including random forest (28/87, 32.1%), logistic regression (18/87, 20.7%), conditional random fields (16/87, 18.4%), and clustering algorithms (15/87, 17.2%). Conditional random field was often applied as a classification layer in neural models like long short-term memory and CNN.</p><p>Neural networks featured in 53 of 226 (23.5%) studies, with RNN (34/53, 64.2% ) and CNN (21/53, 39.6%), being the most popular in this category (<xref ref-type="table" rid="table1">Table 1</xref>). RNNs were dominated by long short-term memory architectures.</p><p>PLMs were used in 41 of 226 (18.1%) studies. These were primarily BERT-based models, with only 2 of the 41 (0.9%) studies [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>] using LLMs (ChatGPT and Google Bard; <xref ref-type="table" rid="table1">Table 1</xref>). Pretrained clinical models&#x2014;BERT-based models pretrained on clinical or biomedical corpora (eg, Bio_ClinicalBERT)&#x2014;were used in 23 of 226 (10.2%) studies (<xref ref-type="table" rid="table1">Table 1</xref>). Among 23 studies that implemented pretrained clinical models, 16compared clinical domain pretrained models to general domain pretrained models. Clinical domain models outperformed general domain models in 11 of 16 (68.8%) studies, while general domain models performed better in the remaining 5 studies (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>).</p></sec><sec id="s3-6"><title>Methods for Non-English Corpora</title><p>Out of 226 studies, 70 (40%) developed models for non-English clinical notes. Of these, 59 (84.3%) implemented language-specific pipelines built from rules and classical machine learning with engineered features, including some hybrid combinations. Pretrained approaches were present but less common and not mutually exclusive across studies: language-specific pretrained models in 11 of 70 (15.7%) studies, multilingual pretrained models in 7 of 70 (10%) studies, language-specific biomedical or clinical pretrained models in 6 of 70 (8.6%) studies, and language-adapted models in 3 of 70 (4.3%) studies. Language-adapted models typically consisted of models pretrained in English and then further trained on the target language (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>).</p><p>In total, 12 studies compared multiple model families. Language-specific biomedical or clinical pretrained models most often yielded the best performance (n=4) [<xref ref-type="bibr" rid="ref30">30</xref>-<xref ref-type="bibr" rid="ref33">33</xref>], followed by language-specific pretrained models (n=3) [<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref36">36</xref>] and language-adapted pretrained models (n=2) [<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref38">38</xref>]. In the remaining 3 studies, the best-performing models were a biomedical or clinical pretrained model [<xref ref-type="bibr" rid="ref39">39</xref>], a language-specific model [<xref ref-type="bibr" rid="ref40">40</xref>], and a multilingual pretrained model [<xref ref-type="bibr" rid="ref41">41</xref>].</p></sec><sec id="s3-7"><title>Text Representation Methods</title><p><xref ref-type="fig" rid="figure4">Figure 4</xref> illustrates the text representation and vectorization methods used in the studies. Out of 226 studies, 120 (53.1%) used at least 1 representation method. From 2015 to 2017, statistical methods including bag of words, n-grams, and term frequency-inverse document frequency were prevalent. In 2018, context-free embeddings (one fixed vector for each word or token regardless of the context in which it is used, eg, Word2Vec, GloVe, and FastText) and contextual embeddings (a new vector assigned to each word or token depending on the surrounding context, eg, BERT and GPT) were introduced and became the predominant approaches. It was common for studies to test multiple embedding methods to identify the best-performing approaches.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Text representation and embedding methods (n=120). Context-free embeddings include Word2Vec, FastText, and GloVe. N-grams include continuous bag of words, skip-gram, bigrams, and trigrams. Groups are not mutually exclusive&#x2014;studies may appear in more than 1 category. TF-IDF: term frequency-inverse document frequency.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v5i1e73481_fig04.png"/></fig></sec><sec id="s3-8"><title>Size of Labeled Data Used to Train and Evaluate NLP Systems</title><p><xref ref-type="fig" rid="figure5">Figure 5</xref> shows the data size (clinical notes, with or without additional medical documents, and patients) used to train and evaluate NLP systems. The median number of documents per partition was fewer than 1000, and the median number of patients associated with these notes was also under 1000. For example, the median number of training documents, test documents, training patients, and test patients was 838 (IQR 439-3905), 300 (IQR 120-1504), 606 (IQR 202-1337), and 231.5 (IQR 86-599), respectively.</p><p>Training and test sets were generally created through random splits, except in 3 studies where the test cohort came from a slightly different patient population (prospective palliative radiation cohort vs metastatic cancer retrospective registry&#x2013;based cohort) [<xref ref-type="bibr" rid="ref42">42</xref>], a different but overlapping time period with the training cohort [<xref ref-type="bibr" rid="ref43">43</xref>], a different nonoverlapping time period with the training cohort [<xref ref-type="bibr" rid="ref44">44</xref>], or where the test cohort had a shorter follow-up time than the training cohort (4 vs 5 years) [<xref ref-type="bibr" rid="ref45">45</xref>].</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Size of the data used in model development and evaluation. Documents refer to entire clinical notes or reports or sentences (a small number of studies reported corpus size in sentences). To cater for instances where train or test split was not specified, we report total data sums (ie, all documents and all patients) as provided by the authors. The number below each boxplot indicates the count of studies reporting data size in that category.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v5i1e73481_fig05.png"/></fig></sec><sec id="s3-9"><title>Annotation Methods for Reference Corpus</title><p>The majority of studies (181/226, 80.1%) trained and evaluated their systems on corpora that were manually annotated by humans. Few studies (7/226, 3.1%) trained models using weakly supervised labels but evaluated them on human-curated labels. A considerable proportion of studies (38/226, 16.8%) either relied on existing labels within the EHR (eg, <italic>International Classification of Diseases</italic> or ICD codes) or developed unsupervised systems, for which manual annotation was not applicable. A summary of annotation methods is provided in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>.</p></sec><sec id="s3-10"><title>Implementation Type and Evaluation</title><p><xref ref-type="table" rid="table2">Table 2</xref> summarizes model implementation type, evaluation metrics, and whether models were externally evaluated. Most studies (179/226, 79.2%) developed new models or retrained or fine-tuned an existing one, while 19.5% (44/226) used existing models without retraining. The latter group included studies that used off-the-shelf tools such as MetaMap or repurposed existing models for new extraction tasks.</p><p>Evaluation metrics varied by task, with the most commonly reported being recall (155/226, 68.6%), precision (153/226, 67.7%), <italic>F</italic><sub>1</sub>-score (136/226, 60.2%), accuracy (44/226, 19.5%), area under the receiver operating characteristic curve (40/226, 17.7%), and specificity (30/226, 13.3%). While metrics such as recall, precision, and <italic>F</italic><sub>1</sub>-score were widely used and therefore suitable for summarization, variability in clinical corpora and tasks precluded comparison on NLP methods. Only 21 of 226 (9.3%) studies evaluated their systems on external corpora.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Model implementation and evaluation (N=226)<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup>.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Implementation or evaluation</td><td align="left" valign="bottom">Values, n (%)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="2">Implementation type</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>New model</td><td align="left" valign="top">179 (79.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Existing model</td><td align="left" valign="top">44 (19.5)</td></tr><tr><td align="left" valign="top" colspan="2">Reported evaluation metrics</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Recall</td><td align="left" valign="top">155 (68.6)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Precision</td><td align="left" valign="top">153 (67.7)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="top">136 (60.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Accuracy</td><td align="left" valign="top">44 (19.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>AUC-ROC<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td><td align="left" valign="top">40 (17.7)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Specificity</td><td align="left" valign="top">30 (13.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Cohen &#x03BA;</td><td align="left" valign="top">5 (2.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Cosine similarity</td><td align="left" valign="top">3 (1.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Mean average precision</td><td align="left" valign="top">2 (0.9)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Other</td><td align="left" valign="top">44 (19.5)</td></tr><tr><td align="left" valign="top" colspan="2">External evaluation</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Yes</td><td align="left" valign="top">21 (9.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>No</td><td align="left" valign="top">158 (69.9)</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>Some studies lacked sufficient information to assess external evaluation; for example, those that used existing tools had their detailed data documented elsewhere.</p></fn><fn id="table2fn2"><p><sup>b</sup>AUC-ROC: area under the receiver operating characteristic curve.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-11"><title>Clinical Applications of NLP</title><p><xref ref-type="fig" rid="figure6">Figure 6A</xref> summarizes the clinical applications of NLP to clinical notes. IE was the most common task, with 77% (174/226) of the studies. In 50.9% (115/226) of the studies, NLP was exclusively used for IE. Diagnostic classification was performed in 62 of 226 (27.4%) studies, while trials or cohort matching was the goal in 16 of 226 (7.1%) studies. Other notable applications included prognostic classification (n=14), concept normalization (n=14), and topic modeling (n=11). It was not uncommon, however, for a study to undertake multiple tasks, often with the output of one task feeding into subsequent tasks.</p><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>NLP clinical applications with clinical notes. (<bold>A</bold>) Number of studies per clinical application. (<bold>B</bold>) Number of clinical applications per year (percentages are relative to the number of papers published in that year). Diagnostic classification refers to document-level or patient-level classification tasks, for example, distinguishing between notes with metastasis and those without metastasis. Prognostic classification refers to predicting that some clinical event of interest will occur within a specified time period in the future, for example, lung cancer recurrence 2 years following lobectomy. NLP: natural language processing.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v5i1e73481_fig06.png"/></fig><p>A subset of studies (n=15) that focused on IE also extracted temporal information. Some studies formulated this task as a document-time relation (DocTimeRel) classification, where events were assigned a temporal relation to the document creation time (before, after, overlap, or before or overlap) [<xref ref-type="bibr" rid="ref46">46</xref>-<xref ref-type="bibr" rid="ref48">48</xref>]. Others used an event-date relation classification formulation, classifying event-time pairs as before, after, overlap, or before or overlap [<xref ref-type="bibr" rid="ref49">49</xref>] or directly linking events to their corresponding dates through contextual pairing [<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref51">51</xref>]. One study constructed patient-level temporal timelines by assigning events to coarse temporal bins (way before admission, before admission, admission, after admission, and discharge) and then temporally ordered them within and across documents [<xref ref-type="bibr" rid="ref52">52</xref>]. Less complex approaches included proximity- or context-based methods (linking events to nearby date mentions using dependency parsing and rule-based contextual heuristics) [<xref ref-type="bibr" rid="ref53">53</xref>-<xref ref-type="bibr" rid="ref58">58</xref>] or simply classifying identified events into broad temporal categories such as current, history, future, or unknown [<xref ref-type="bibr" rid="ref59">59</xref>,<xref ref-type="bibr" rid="ref60">60</xref>].</p><p><xref ref-type="fig" rid="figure6">Figure 6B</xref> shows the evolution of clinical NLP applications over time. IE remained the predominant task throughout the years, followed by diagnostic classification. Newer applications introduced after 2018 include concept normalization, prognostic classification, and topic modeling. Task chaining, where the output of one task is used as the input for downstream tasks, was common in studies that went beyond IE. For example, in 2014, there were 4 publications of NLP applied to clinical notes. All 4 (100%) studies used NLP to extract information of some kind, 2 (50%) studies used the extracted information to match patients to clinical trials, 1 (25%) study used the extracted information for diagnostic classification, and 1 (25%) study had IE as the end point. Discrete models were almost exclusively used for these downstream tasks.</p></sec><sec id="s3-12"><title>System Deployment Stage and Clinical Impact</title><p>Of the 226 reviewed studies, 224 (99.1%) developed proof-of-concept systems that were evaluated only in research settings rather than deployed in routine clinical practice. One study piloted their system in clinical practice [<xref ref-type="bibr" rid="ref61">61</xref>], while another described the use of an NLP-based system that had just been integrated in clinical use [<xref ref-type="bibr" rid="ref62">62</xref>].</p><p>Since most studies were evaluated only as research implementations (ie, no real-world deployments), clinical impact was not evaluated. However, 25 of the 226 (11.1%) studies compared their systems with current practice in their research implementation. These studies reported benefits such as improved data coverage (identified more patients with the relevant attribute than from structured data alone) and completeness (curated further variables not available as structured data) [<xref ref-type="bibr" rid="ref63">63</xref>-<xref ref-type="bibr" rid="ref75">75</xref>], taking less time to extract relevant information [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref61">61</xref>,<xref ref-type="bibr" rid="ref76">76</xref>-<xref ref-type="bibr" rid="ref82">82</xref>], fewer clinician man-hours for certain tasks (eg, fewer clinicians needed to complete clinical audits) [<xref ref-type="bibr" rid="ref29">29</xref>], and higher classification or prediction accuracy compared with human experts or existing methods [<xref ref-type="bibr" rid="ref76">76</xref>,<xref ref-type="bibr" rid="ref83">83</xref>,<xref ref-type="bibr" rid="ref84">84</xref>]. One study that described an IE system in routine use [<xref ref-type="bibr" rid="ref62">62</xref>] focused on characterizing use patterns, including which clinical specialties used the system and for what purposes.</p></sec><sec id="s3-13"><title>Challenges and Limitations Reported by the Authors</title><p><xref ref-type="table" rid="table3">Table 3</xref> details challenges and limitations faced by researchers applying different NLP techniques to clinical notes. Common challenges were single-institution corpora (39/226, 17.3%), limited data (18/226, 8%), incomplete EHR data (14/226, 6.2%), label imbalance (12/226, 5.3%), rules or dictionary not comprehensive or generalizable (9/226, 4%), and word sense and abbreviation disambiguation (6/226, 2.7%). Overall, authors reported a range of challenges, some unique to the task, corpora, or methodological approach.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Challenges and limitations reported in studies<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup>.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Challenge or limitation</td><td align="left" valign="bottom">Values, n (%)</td></tr></thead><tbody><tr><td align="left" valign="top">Single institution corpus [<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref61">61</xref>,<xref ref-type="bibr" rid="ref70">70</xref>,<xref ref-type="bibr" rid="ref72">72</xref>,<xref ref-type="bibr" rid="ref73">73</xref>,<xref ref-type="bibr" rid="ref75">75</xref>,<xref ref-type="bibr" rid="ref77">77</xref>,<xref ref-type="bibr" rid="ref79">79</xref>,<xref ref-type="bibr" rid="ref82">82</xref>,<xref ref-type="bibr" rid="ref84">84</xref>-<xref ref-type="bibr" rid="ref111">111</xref>]</td><td align="left" valign="top">39 (17.3)</td></tr><tr><td align="left" valign="top">Limited data [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref57">57</xref>,<xref ref-type="bibr" rid="ref61">61</xref>,<xref ref-type="bibr" rid="ref62">62</xref>,<xref ref-type="bibr" rid="ref65">65</xref>,<xref ref-type="bibr" rid="ref79">79</xref>,<xref ref-type="bibr" rid="ref90">90</xref>,<xref ref-type="bibr" rid="ref92">92</xref>,<xref ref-type="bibr" rid="ref104">104</xref>,<xref ref-type="bibr" rid="ref111">111</xref>-<xref ref-type="bibr" rid="ref117">117</xref>]</td><td align="left" valign="top">18 (8)</td></tr><tr><td align="left" valign="top">Incomplete recording in the EHR<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup> [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref57">57</xref>,<xref ref-type="bibr" rid="ref74">74</xref>,<xref ref-type="bibr" rid="ref78">78</xref>,<xref ref-type="bibr" rid="ref81">81</xref>,<xref ref-type="bibr" rid="ref94">94</xref>,<xref ref-type="bibr" rid="ref98">98</xref>,<xref ref-type="bibr" rid="ref103">103</xref>,<xref ref-type="bibr" rid="ref109">109</xref>,<xref ref-type="bibr" rid="ref118">118</xref>-<xref ref-type="bibr" rid="ref122">122</xref>]</td><td align="left" valign="top">14 (6.2)</td></tr><tr><td align="left" valign="top">Label imbalance [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref73">73</xref>,<xref ref-type="bibr" rid="ref82">82</xref>,<xref ref-type="bibr" rid="ref98">98</xref>,<xref ref-type="bibr" rid="ref102">102</xref>,<xref ref-type="bibr" rid="ref104">104</xref>,<xref ref-type="bibr" rid="ref123">123</xref>-<xref ref-type="bibr" rid="ref126">126</xref>]</td><td align="left" valign="top">12 (5.3)</td></tr><tr><td align="left" valign="top">Negation detection and resolution [<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref74">74</xref>,<xref ref-type="bibr" rid="ref97">97</xref>,<xref ref-type="bibr" rid="ref119">119</xref>,<xref ref-type="bibr" rid="ref126">126</xref>-<xref ref-type="bibr" rid="ref131">131</xref>]</td><td align="left" valign="top">10 (4.4)</td></tr><tr><td align="left" valign="top">Dictionary or rules not comprehensive or generalizable [<xref ref-type="bibr" rid="ref65">65</xref>,<xref ref-type="bibr" rid="ref66">66</xref>,<xref ref-type="bibr" rid="ref92">92</xref>,<xref ref-type="bibr" rid="ref119">119</xref>,<xref ref-type="bibr" rid="ref120">120</xref>,<xref ref-type="bibr" rid="ref132">132</xref>-<xref ref-type="bibr" rid="ref135">135</xref>]</td><td align="left" valign="top">9 (4)</td></tr><tr><td align="left" valign="top">Word sense or abbreviation disambiguation [<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref130">130</xref>,<xref ref-type="bibr" rid="ref131">131</xref>,<xref ref-type="bibr" rid="ref136">136</xref>-<xref ref-type="bibr" rid="ref138">138</xref>]</td><td align="left" valign="top">6 (2.7)</td></tr><tr><td align="left" valign="top">Variability in terminology used to describe the same concept [<xref ref-type="bibr" rid="ref78">78</xref>,<xref ref-type="bibr" rid="ref120">120</xref>,<xref ref-type="bibr" rid="ref136">136</xref>,<xref ref-type="bibr" rid="ref139">139</xref>]</td><td align="left" valign="top">4 (1.8)</td></tr><tr><td align="left" valign="top">Spelling errors or typos [<xref ref-type="bibr" rid="ref90">90</xref>,<xref ref-type="bibr" rid="ref130">130</xref>,<xref ref-type="bibr" rid="ref137">137</xref>,<xref ref-type="bibr" rid="ref140">140</xref>]</td><td align="left" valign="top">4 (1.8)</td></tr><tr><td align="left" valign="top">Imbalanced data [<xref ref-type="bibr" rid="ref57">57</xref>,<xref ref-type="bibr" rid="ref102">102</xref>,<xref ref-type="bibr" rid="ref105">105</xref>,<xref ref-type="bibr" rid="ref141">141</xref>]</td><td align="left" valign="top">4 (1.8)</td></tr><tr><td align="left" valign="top">Use of speculative language [<xref ref-type="bibr" rid="ref117">117</xref>,<xref ref-type="bibr" rid="ref128">128</xref>,<xref ref-type="bibr" rid="ref136">136</xref>]</td><td align="left" valign="top">3 (1.3)</td></tr><tr><td align="left" valign="top">Use of nonstandard terminology [<xref ref-type="bibr" rid="ref90">90</xref>,<xref ref-type="bibr" rid="ref128">128</xref>,<xref ref-type="bibr" rid="ref142">142</xref>]</td><td align="left" valign="top">3 (1.3)</td></tr><tr><td align="left" valign="top">Rarity of concepts of interest [<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref143">143</xref>]</td><td align="left" valign="top">3 (1.3)</td></tr><tr><td align="left" valign="top">Institutional differences in documentation style or note structure [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref81">81</xref>,<xref ref-type="bibr" rid="ref117">117</xref>]</td><td align="left" valign="top">3 (1.3)</td></tr><tr><td align="left" valign="top">Quality of human annotations [<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref80">80</xref>]</td><td align="left" valign="top">2 (0.9)</td></tr><tr><td align="left" valign="top">Multilingualism in text [<xref ref-type="bibr" rid="ref79">79</xref>,<xref ref-type="bibr" rid="ref128">128</xref>]</td><td align="left" valign="top">2 (0.9)</td></tr><tr><td align="left" valign="top">Temporal reasoning (current vs historical events) [<xref ref-type="bibr" rid="ref129">129</xref>,<xref ref-type="bibr" rid="ref138">138</xref>]</td><td align="left" valign="top">2 (0.9)</td></tr><tr><td align="left" valign="top">Short notes or sentences (insufficient context for context-dependent models) [<xref ref-type="bibr" rid="ref72">72</xref>,<xref ref-type="bibr" rid="ref144">144</xref>]</td><td align="left" valign="top">2 (0.9)</td></tr><tr><td align="left" valign="top">Model computationally expensive [<xref ref-type="bibr" rid="ref38">38</xref>]</td><td align="left" valign="top">1 (0.4)</td></tr><tr><td align="left" valign="top">Distant (intersentence) relations [<xref ref-type="bibr" rid="ref124">124</xref>]</td><td align="left" valign="top">1 (0.4)</td></tr><tr><td align="left" valign="top">Frequency of co-occurrence of unrelated concepts [<xref ref-type="bibr" rid="ref143">143</xref>]</td><td align="left" valign="top">1 (0.4)</td></tr><tr><td align="left" valign="top">Long execute-response time [<xref ref-type="bibr" rid="ref145">145</xref>]</td><td align="left" valign="top">1 (0.4)</td></tr><tr><td align="left" valign="top">Very long documents ( &#x003E;512 token limit for BERT<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup>-based models) [<xref ref-type="bibr" rid="ref125">125</xref>]</td><td align="left" valign="top">1 (0.4)</td></tr><tr><td align="left" valign="top">Significant n-gram method insensitive to evolution of patient&#x2019;s notes over time and between patients [<xref ref-type="bibr" rid="ref146">146</xref>]</td><td align="left" valign="top">1 (0.4)</td></tr><tr><td align="left" valign="top">Resolution of patient and nonpatient references [<xref ref-type="bibr" rid="ref97">97</xref>]</td><td align="left" valign="top">1 (0.4)</td></tr><tr><td align="left" valign="top">Nonstandard date formats [<xref ref-type="bibr" rid="ref57">57</xref>]</td><td align="left" valign="top">1 (0.4)</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>Negation detection and resolution includes detecting the negation itself, distant negations, and resolving the scope of the negation. Limited data encompass the following: small corpus, only a small number of patients associated with those notes, and small annotated or labeled notes for model development and evaluation. Imbalanced data refer to instances where notes are overrepresented by text from one patient group (eg, private insurance vs noninsured). Label imbalance is when one label of interest (eg, a certain biomarker) is more prevalent in the notes, hence, easily learned by the model at the expense of other labels (biomarkers). Quality of human annotations is where human annotated corpora for model training and evaluation are erroneous.</p></fn><fn id="table3fn2"><p><sup>b</sup>EHR: electronic health record.</p></fn><fn id="table3fn3"><p><sup>c</sup>BERT: Bidirectional Encoder Representations from Transformers.</p></fn></table-wrap-foot></table-wrap></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Summary of Main Findings</title><p>Research applying NLP to clinical notes in the cancer domain grew substantially during the review period, rising from 4 publications in 2014 to 43 in 2023, likely driven by the increasing availability of digital records and advances in scalable NLP methods. However, most studies relied on English language (156/226, 69%) and single-institution (161/226, 71.2) datasets. The majority of studies originated from the United States (133/226, 58.8%), which aligns with trends in clinical NLP publishing in which the United States dominates [<xref ref-type="bibr" rid="ref147">147</xref>]. Almost half of the studies (110/226, 48.7%) provided no information on the characteristics of patients whose clinical notes were used, while 56.6% (128/226) did not provide a statement on data sharing, limiting interpretability and reproducibility. The most commonly studied cancers (breast, lung, colorectal, and prostate) likely reflect their prevalence in the United States and hence dedicated EHR systems, which in turn increases the availability of clinical notes.</p><p>NLP methods for processing clinical notes evolved from exclusively ontology-based, rule-based, and discrete models (2014&#x2010;2017) to hybrid approaches incorporating neural networks and PLMs such as BERT (2018&#x2010;2024). Only a few studies applied LLMs, with publications starting from October 2023. Contextual embeddings have become increasingly prevalent, reflecting the wider adoption of pretrained models. Most studies used small single-institution datasets (&#x003C;1000 documents or &#x003C;1000 patients), likely due to challenges in accessing clinical notes. Annotation methods were mostly manual. A subanalysis of non-English corpora studies showed that the majority (59/70, 84.3%) implemented language-specific, nonpretrained models. Domain-specific pretrained clinical models were superior to other model types in the majority (11/16, 68.8%) of studies across both English and non-English corpora. Only 9.3% (21/226) of studies evaluated their systems on external datasets.</p><p>Most studies (174/226, 77%) focused on IE. A subset of these used the extracted information in downstream tasks, but the majority (115/226, 50.9%) focused solely on IE. In total, 15 studies extracted temporal information from clinical notes using various approaches, including DocTimeRel classification, event-time relation classification, and proximity- or context-based methods. No studies evaluated clinical impact following implementation, but several studies compared their systems to current practice in their respective settings (eg, manual review of notes in clinical audits) and demonstrated potential clinical utility. The most common challenge in clinical NLP was restricted access to sufficient clinical notes, reported by 17.3% (39/226) of studies.</p></sec><sec id="s4-2"><title>Evolution of NLP Methods for Clinical Notes</title><p>NLP methods for clinical notes have become more diverse over time. While new deep learning&#x2013;based techniques have gained popularity, they have largely complemented rather than replaced traditional methods such as rules and ontologies, resulting in widespread adoption of hybrid architectures. Prior reviews that included substantial volumes of clinical notes reported similar findings, namely, the predominance of rule-based methods alongside increasing use of hybrid architectures that combine rules with machine learning or neural networks [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. However, a review of NLP applied to diagnostic (radiology) reports reported slightly different findings, with rule-based and classical machine learning methods being prevalent but often used as baselines against which deep learning approaches were compared [<xref ref-type="bibr" rid="ref148">148</xref>].</p><p>The continued use of rule-based approaches for clinical notes likely reflects the unique challenges posed by these documents, which often require substantial preprocessing before neural models can be applied, as well as postprocessing to structure model outputs into clinically meaningful formats. The overall prevalence of rule-based methods may also partly reflect the inclusion of semistructured diagnostic reports, which&#x2014;owing to their templated design and restricted, domain-specific vocabulary&#x2014;are generally more amenable to rule-based processing [<xref ref-type="bibr" rid="ref149">149</xref>]. Combining knowledge resources with deep neural models, on the other hand, may reflect authors&#x2019; efforts to enhance the explainability of predictions made by these complex networks, given the importance of explainability in health care AI [<xref ref-type="bibr" rid="ref150">150</xref>,<xref ref-type="bibr" rid="ref151">151</xref>] and evidence from prior work that integrating knowledge into deep learning may improve explainability [<xref ref-type="bibr" rid="ref152">152</xref>].</p><p>Text representation methods have evolved alongside machine learning models. Earlier NLP approaches commonly relied on discrete word representations, such as term frequency-inverse document frequency and n-grams [<xref ref-type="bibr" rid="ref153">153</xref>]. Our review shows that context-free word embeddings (eg, Word2Vec, GloVe, and FastText) were the most widely used, typically with classical machine learning models. The results also suggest that these approaches are increasingly being complemented or replaced by contextual embeddings derived from transformer-based models, which represent words as vectors that capture richer semantic and syntactic relationships.</p></sec><sec id="s4-3"><title>Trends in NLP Clinical Applications</title><p>NLP applications to clinical notes focused predominantly on IE, accounting for over three-quarters of included studies, with comparatively limited use in downstream clinical decision-making tasks. This emphasis reflects both the pragmatic advantages and the perceived safety of IE. By structuring free-text data into clinically meaningful variables, IE enables expert oversight, produces interpretable intermediate outputs, and supports a broad range of secondary applications, including diagnostic or prognostic modeling, cohort identification, and decision support [<xref ref-type="bibr" rid="ref154">154</xref>]. In contrast, approaches that predict outcomes directly from unstructured text without an explicit IE step are often less transparent, constrain the incorporation of domain knowledge, and are typically optimized for a single task [<xref ref-type="bibr" rid="ref155">155</xref>].</p></sec><sec id="s4-4"><title>Potential Clinical Impact of NLP</title><p>Although none of the included studies evaluated the direct clinical impact of NLP systems on patient care following research implementation, several studies compared their systems with current clinical practice as part of their evaluation. These comparisons demonstrated the potential of NLP to support tasks such as IE, clinical auditing, and diagnostic or prognostic classification. However, most studies (161/226, 71.2%) relied on small, single-institution datasets, raising concerns about generalizability, as such models often perform less well when applied to more representative or external datasets due to differences in both population characteristics and data structure. Without extensive evaluation across diverse datasets, there remains limited evidence of real-world effectiveness, thereby impeding adoption into routine clinical use.</p><p>Beyond technical performance, the application of NLP systems to high-risk tasks, such as cancer diagnosis or risk prediction, is subject to stringent regulatory oversight as medical devices [<xref ref-type="bibr" rid="ref156">156</xref>,<xref ref-type="bibr" rid="ref157">157</xref>]. These regulatory requirements, together with challenges in integrating NLP systems into existing clinical workflows [<xref ref-type="bibr" rid="ref158">158</xref>], further hinder translation into routine clinical care and help explain the limited real-world impact observed across studies.</p></sec><sec id="s4-5"><title>Challenges and Opportunities in Advancing Clinical NLP</title><p>Our findings indicate that restricted access to clinical data remains the dominant barrier in oncology NLP. Access to clinical corpora is complicated by multiple barriers, including national data protection regulations governing privacy and confidentiality (eg, the General Data Protection Regulation [<xref ref-type="bibr" rid="ref159">159</xref>] in the European Union and the Health Insurance Portability and Accountability Act [<xref ref-type="bibr" rid="ref160">160</xref>] in the United States), additional institutional governance restrictions imposed to mitigate disclosure risk and legal liability [<xref ref-type="bibr" rid="ref161">161</xref>], and technical obstacles such as EHR interoperability [<xref ref-type="bibr" rid="ref161">161</xref>]. This is compounded by limited data sharing practices, with many studies providing no clear data availability statement or listing data as &#x201C;available on reasonable request,&#x201D; a practice that often creates substantial practical barriers, including low response rates and protracted negotiations that effectively limit access. As a result, researchers have to rely on small, single-institution datasets, resulting in proof-of-concept systems with limited generalizability.</p><p>Limited data accessibility undermines reproducibility, hinders meaningful comparison across studies, prevents the establishment of standardized benchmarks for performance evaluation, and reinforces reliance on small, single-institution datasets. Collectively, these challenges derail real-world deployment of clinical NLP systems.</p><p>Several methodological approaches have attempted to mitigate data scarcity, each with notable limitations. Transfer learning through clinical PLMs (eg, ClinicalBERT) is constrained by training on relatively small and institutionally narrow corpora, reflecting the same access limitations they aim to overcome, which can result in suboptimal performance on downstream tasks [<xref ref-type="bibr" rid="ref162">162</xref>,<xref ref-type="bibr" rid="ref163">163</xref>]. Publicly available deidentified datasets curated for clinical NLP shared tasks (eg, Cancer Text Mining Shared Task [<xref ref-type="bibr" rid="ref164">164</xref>]) face similar limitations, being small and single-center.</p><p>More recently, LLMs have shown promise in mitigating data scarcity by enabling zero-shot or few-shot learning, thereby reducing dependence on large, manually annotated corpora [<xref ref-type="bibr" rid="ref165">165</xref>,<xref ref-type="bibr" rid="ref166">166</xref>]. However, LLMs introduce additional challenges, including the propagation of embedded biases [<xref ref-type="bibr" rid="ref167">167</xref>], privacy breaches [<xref ref-type="bibr" rid="ref168">168</xref>], model obsolescence and drift [<xref ref-type="bibr" rid="ref168">168</xref>], hallucination and confidently stated falsehoods [<xref ref-type="bibr" rid="ref169">169</xref>,<xref ref-type="bibr" rid="ref170">170</xref>], and substantial computational and environmental costs. These shortcomings can be detrimental to clinical practice, for example, by systematically underrecommending investigations, procedures, or treatments for underrepresented patient groups. Therefore, research on LLMs should also focus on addressing these ethical concerns in addition to technical performance and generalizability.</p><p>Model-centric privacy-preserving approaches, such as federated learning, where models are trained locally and aggregated without sharing raw data [<xref ref-type="bibr" rid="ref171">171</xref>], offer a potential pathway toward multi-institutional collaboration without direct data transfer. However, practical deployment remains challenging, requiring compatible infrastructure, sustained institutional partnerships, and strategies to manage data heterogeneity and site imbalance, which can bias global models toward dominant contributors and degrade performance for underrepresented populations [<xref ref-type="bibr" rid="ref172">172</xref>]. Related techniques, such as differential privacy, may further reduce reidentification risk but introduce trade-offs between privacy protection and model utility that must be carefully managed [<xref ref-type="bibr" rid="ref173">173</xref>].</p><p>Beyond algorithmic solutions, structural and policy-level interventions are likely to be critical. National initiatives, such as those implemented in Denmark, where clinical notes are rigorously deidentified and made accessible within secure research environments [<xref ref-type="bibr" rid="ref69">69</xref>], demonstrate the feasibility of balancing privacy protection with research utility. Broader adoption of such frameworks, alongside clearer institutional agreements that permit sharing of rigorously deidentified clinical text and accompanying code, could substantially improve reproducibility and accelerate progress in oncology NLP. Furthermore, to support open science in oncology, NLP future studies should adopt more transparent reporting of data access conditions, and where feasible, publicly release the code, alongside clear governance mechanisms to balance reproducibility with patient privacy.</p></sec><sec id="s4-6"><title>Limitations of the Review</title><p>This review has several limitations. First, approximately half of the included studies analyzed clinical notes alongside more structured medical documents such as pathology or radiology reports. These document types differ substantially in linguistic complexity, with diagnostic reports often being more templated and semistructured compared to free-text clinical notes such as progress notes or discharge summaries. As a result, the NLP methods and challenges reported in such studies may not be fully representative of those encountered when analyzing highly unstructured clinical narratives.</p><p>Second, we were unable to determine the proportion of clinical notes versus other document types in each study, as this was rarely reported. While we distinguished document types where possible, inconsistent reporting limited further quantification of these documents. Consequently, our findings reflect the broader landscape of clinical text processing in oncology rather than exclusively characterizing NLP applied to highly unstructured clinical notes. Nonetheless, we provide a more faithful representation of pregenerative AI methodological choices and challenges associated with clinical notes, as all included studies incorporated clinical notes. In addition, we could not systematically compare model performance across studies due to substantial heterogeneity in corpora and NLP tasks.</p><p>Third, the predominance of studies authored by researchers from the United States (133/226, 58.8%), primarily using local datasets, may have introduced some geographical and system-level bias. Our findings are therefore more reflective of the US health care context, including workflows, documentation styles, clinical note structures, and data access provisions.</p><p>Finally, Cohen &#x03BA; for title or abstract screening (0.54) and full-text screening (0.58) indicated moderate interrater agreement. This primarily reflects challenges in operationalizing eligibility criteria. In particular, disagreement frequently arose from ambiguity in how studies described their textual data sources, as some authors used the term &#x201C;clinical notes&#x201D; broadly to refer to any textual medical document, including diagnostic reports. This was exacerbated by limited methodological detail in the abstract, making it difficult to determine whether clinical notes were included. Despite this, class-specific agreement for exclusions at title or abstract screening was high (97.9%), while agreement for included studies improved substantially at full-text screening (86.3%) once detailed information was available. The moderate &#x03BA; values could therefore be partly attributed to class imbalance inherent to evidence synthesis, as most records are excluded at the title or abstract, and &#x03BA; adjusts for agreement expected by chance.</p></sec><sec id="s4-7"><title>Conclusions</title><p>This review establishes a comprehensive pregenerative AI baseline for NLP applied to clinical notes in oncology. Over the past decade, research volume increased substantially, and methods evolved from rule-based approaches to hybrid architectures incorporating rules and neural networks, including PLMs. However, most studies focused on IE rather than diagnosis or prognostication, relied on small single-institution datasets, and lacked external validation. While several systems demonstrated superior performance compared to current practice in research settings, significant barriers to clinical deployment remain, including limited generalizability, poor reproducibility, and restricted data access. Emerging generative AI approaches will need to address these barriers, as well as broader ethical challenges, to enable the translation of NLP systems into clinical settings for real-world impact.</p></sec></sec></body><back><ack><p>The authors thank Paula Funnell (Academic Skills and Liaison Librarian, Faculty of Medicine and Dentistry, Queen Mary University of London, Whitechapel Campus) for her assistance in developing the search strategy. The authors used ChatGPT (a generative artificial intelligence tool developed by OpenAI) to refine the Python code used to plot figures and to edit selected sections of the manuscript to improve grammar, sentence structure, and brevity. All outputs (code and text) were checked and, where necessary, revised by the authors.</p></ack><notes><sec><title>Funding</title><p>This study was conducted without any funding. However, the first author (ABK) completed this study as part of his PhD funded by the Wellcome Trust through the Health Data in Practice Doctoral Training Programme at Queen Mary University of London (grant 218584/Z/19/Z).</p></sec><sec><title>Data Availability</title><p>All data generated and analyzed during this study are included in this published paper as <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p></sec></notes><fn-group><fn fn-type="con"><p>ABK conceptualized and designed the study under the supervision of GF. KL, HRAE, FMW, and CC reviewed the study methodology. ABK performed the database searches and reference retrieval. ABK and HRAE completed the title or abstract screening, full-text screening, and data extraction, and analyzed and interpreted the data. ABK drafted the manuscript, and GF, HRAE, KL, FMW, and CC reviewed the draft. ABK revised the manuscript. All authors read and approved the final manuscript.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">BERT</term><def><p>Bidirectional Encoder Representations from Transformers</p></def></def-item><def-item><term id="abb3">CNN</term><def><p>convolutional neural network</p></def></def-item><def-item><term id="abb4">EHR</term><def><p>electronic health record</p></def></def-item><def-item><term id="abb5">ICD</term><def><p>International Classification of Diseases</p></def></def-item><def-item><term id="abb6">IE</term><def><p>information extraction</p></def></def-item><def-item><term id="abb7">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb8">NLP</term><def><p>natural language processing</p></def></def-item><def-item><term id="abb9">PLM</term><def><p>pretrained language model</p></def></def-item><def-item><term id="abb10">PRISMA-ScR</term><def><p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses Extension for Scoping Reviews</p></def></def-item><def-item><term id="abb11">RNN</term><def><p>recurrent neural network</p></def></def-item><def-item><term id="abb12">UMLS</term><def><p>Unified Medical Language System</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="web"><article-title>GLOBOCAN 2020: new global cancer data</article-title><source>UICC</source><access-date>2023-12-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.uicc.org/news/globocan-2020-new-global-cancer-data">https://www.uicc.org/news/globocan-2020-new-global-cancer-data</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="web"><article-title>Worldwide cancer incidence statistics</article-title><source>Cancer Research UK</source><access-date>2023-12-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cancerresearchuk.org/health-professional/cancer-statistics/worldwide-cancer/incidence#heading-One">https://www.cancerresearchuk.org/health-professional/cancer-statistics/worldwide-cancer/incidence#heading-One</ext-link></comment></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>E</given-names> </name><name name-style="western"><surname>Rubinstein</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Nead</surname><given-names>KT</given-names> </name><name name-style="western"><surname>Wojcieszynski</surname><given-names>AP</given-names> </name><name name-style="western"><surname>Gabriel</surname><given-names>PE</given-names> </name><name name-style="western"><surname>Warner</surname><given-names>JL</given-names> </name></person-group><article-title>The evolving use of electronic health records (EHR) for research</article-title><source>Semin Radiat Oncol</source><year>2019</year><month>10</month><volume>29</volume><issue>4</issue><fpage>354</fpage><lpage>361</lpage><pub-id pub-id-type="doi">10.1016/j.semradonc.2019.05.010</pub-id><pub-id pub-id-type="medline">31472738</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="web"><article-title>Structured vs unstructured data in healthcare</article-title><source>HealthTech</source><access-date>2025-01-07</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://healthtechmagazine.net/article/2023/05/structured-vs-unstructured-data-in-healthcare-perfcon#:~:text=Bring%20order%20to%20unstructured%20data,two%20dozen%20ICD-10%20codes">https://healthtechmagazine.net/article/2023/05/structured-vs-unstructured-data-in-healthcare-perfcon#:~:text=Bring%20order%20to%20unstructured%20data,two%20dozen%20ICD-10%20codes</ext-link></comment></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tayefi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ngo</surname><given-names>P</given-names> </name><name name-style="western"><surname>Chomutare</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Challenges and opportunities beyond structured data in analysis of electronic health records</article-title><source>WIREs Comput Stats</source><year>2021</year><month>11</month><volume>13</volume><issue>6</issue><pub-id pub-id-type="doi">10.1002/wics.1549</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Meystre</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Savova</surname><given-names>GK</given-names> </name><name name-style="western"><surname>Kipper-Schuler</surname><given-names>KC</given-names> </name><name name-style="western"><surname>Hurdle</surname><given-names>JF</given-names> </name></person-group><article-title>Extracting information from textual documents in the electronic health record: a review of recent research</article-title><source>Yearb Med Inform</source><year>2008</year><volume>17</volume><fpage>128</fpage><lpage>144</lpage><pub-id pub-id-type="doi">10.1055/s-0038-1638592</pub-id><pub-id pub-id-type="medline">18660887</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Perera</surname><given-names>S</given-names> </name><name name-style="western"><surname>Sheth</surname><given-names>A</given-names> </name><name name-style="western"><surname>Thirunarayan</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Challenges in understanding clinical notes: why NLP engines fall short and where background knowledge can help</article-title><conf-name>International Conference on Information and Knowledge Management, Proceedings</conf-name><conf-date>Nov 3-7, 2013</conf-date><pub-id pub-id-type="doi">10.1145/2512410.2512427</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Madan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Lentzen</surname><given-names>M</given-names> </name><name name-style="western"><surname>Brandt</surname><given-names>J</given-names> </name><name name-style="western"><surname>Rueckert</surname><given-names>D</given-names> </name><name name-style="western"><surname>Hofmann-Apitius</surname><given-names>M</given-names> </name><name name-style="western"><surname>Fr&#x00F6;hlich</surname><given-names>H</given-names> </name></person-group><article-title>Transformer models in biomedicine</article-title><source>BMC Med Inform Decis Mak</source><year>2024</year><month>07</month><day>29</day><volume>24</volume><issue>1</issue><fpage>214</fpage><pub-id pub-id-type="doi">10.1186/s12911-024-02600-5</pub-id><pub-id pub-id-type="medline">39075407</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Klotzman</surname><given-names>V</given-names> </name><etal/></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Kunze</surname><given-names>H</given-names> </name><name name-style="western"><surname>Torre</surname><given-names>D</given-names> </name><name name-style="western"><surname>Riccoboni</surname><given-names>A</given-names> </name></person-group><article-title>The difficulties of clinical NLP</article-title><source>Engineering Mathematics and Artificial Intelligence: Foundations, Methods, and Applications</source><year>2023</year><publisher-name>CRC Press</publisher-name><fpage>413</fpage><lpage>423</lpage><pub-id pub-id-type="doi">10.1201/9781003283980-17</pub-id><pub-id pub-id-type="other">9781032255675</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Devlin</surname><given-names>J</given-names> </name><name name-style="western"><surname>Chang</surname><given-names>MW</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>K</given-names> </name><name name-style="western"><surname>Toutanova</surname><given-names>K</given-names> </name></person-group><article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title><conf-name>NAACL HLT 2019&#x2014;2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies&#x2014;Proceedings of the Conference</conf-name><conf-date>Jun 2-7, 2019</conf-date><pub-id pub-id-type="doi">10.18653/v1/N19-1423</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Brown</surname><given-names>TB</given-names> </name><name name-style="western"><surname>Mann</surname><given-names>B</given-names> </name><name name-style="western"><surname>Ryder</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Language models are few-shot learners</article-title><access-date>2026-04-24</access-date><conf-name>34th Conference on Neural Information Processing Systems (NeurIPS 2020)</conf-name><conf-date>Dec 8-10, 2020</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://proceedings.neurips.cc/paper/2020/file/1457c0d6bfcb4967418bfb8ac142f64a-Paper.pdf">https://proceedings.neurips.cc/paper/2020/file/1457c0d6bfcb4967418bfb8ac142f64a-Paper.pdf</ext-link></comment></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Touvron</surname><given-names>H</given-names> </name><name name-style="western"><surname>Lavril</surname><given-names>T</given-names> </name><name name-style="western"><surname>Izacard</surname><given-names>G</given-names> </name><etal/></person-group><article-title>LLaMA: open and efficient foundation language models</article-title><comment>Preprint posted online on  Feb 27, 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2302.13971</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tariq</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sikha</surname><given-names>M</given-names> </name><name name-style="western"><surname>Kurian</surname><given-names>AW</given-names> </name><etal/></person-group><article-title>Open-source hybrid large language model integrated system for extraction of breast cancer treatment pathway from free-text clinical notes</article-title><source>JCO Clin Cancer Inform</source><year>2025</year><month>06</month><volume>9</volume><issue>9</issue><fpage>e2500002</fpage><pub-id pub-id-type="doi">10.1200/CCI-25-00002</pub-id><pub-id pub-id-type="medline">40577660</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>He</surname><given-names>L</given-names> </name><name name-style="western"><surname>Naeem</surname><given-names>A</given-names> </name><etal/></person-group><article-title>SBDH-Reader: a large language model-powered method for extracting social and behavioral determinants of health from clinical notes</article-title><source>J Am Med Inform Assoc</source><year>2025</year><month>10</month><day>1</day><volume>32</volume><issue>10</issue><fpage>1570</fpage><lpage>1580</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocaf124</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kaster</surname><given-names>L</given-names> </name><name name-style="western"><surname>Hillis</surname><given-names>E</given-names> </name><name name-style="western"><surname>Oh</surname><given-names>IY</given-names> </name><etal/></person-group><article-title>Comparison of rule- and large language model-based phenotype extraction from clinical notes for neurofibromatosis type 1</article-title><source>J Am Med Inform Assoc</source><year>2025</year><month>11</month><day>1</day><volume>32</volume><issue>11</issue><fpage>1663</fpage><lpage>1673</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocaf155</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>D</given-names> </name><name name-style="western"><surname>Alnassar</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Avison</surname><given-names>KE</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>RS</given-names> </name><name name-style="western"><surname>Raman</surname><given-names>S</given-names> </name></person-group><article-title>Large language model applications for health information extraction in oncology: scoping review</article-title><source>JMIR Cancer</source><year>2025</year><month>03</month><day>28</day><volume>11</volume><fpage>e65984</fpage><pub-id pub-id-type="doi">10.2196/65984</pub-id><pub-id pub-id-type="medline">40153782</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhong</surname><given-names>R</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>S</given-names> </name><name name-style="western"><surname>Li</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Large language models in lung cancer: systematic review</article-title><source>J Med Internet Res</source><year>2025</year><month>09</month><day>30</day><volume>27</volume><fpage>e74177</fpage><pub-id pub-id-type="doi">10.2196/74177</pub-id><pub-id pub-id-type="medline">41026980</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hao</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Qiu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Holmes</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Large language model integrations in cancer decision-making: a systematic review and meta-analysis</article-title><source>NPJ Digit Med</source><year>2025</year><month>07</month><day>17</day><volume>8</volume><issue>1</issue><fpage>450</fpage><pub-id pub-id-type="doi">10.1038/s41746-025-01824-7</pub-id><pub-id pub-id-type="medline">40676129</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Fu</surname><given-names>S</given-names> </name><name name-style="western"><surname>Wen</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Assessment of electronic health record for cancer research and patient care through a scoping review of cancer natural language processing</article-title><source>JCO Clin Cancer Inform</source><year>2022</year><month>07</month><volume>6</volume><fpage>e2200006</fpage><pub-id pub-id-type="doi">10.1200/CCI.22.00006</pub-id><pub-id pub-id-type="medline">35917480</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>C</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Weng</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>B</given-names> </name><name name-style="western"><surname>Li</surname><given-names>Z</given-names> </name></person-group><article-title>Natural language processing applications for computer-aided diagnosis in oncology</article-title><source>Diagnostics (Basel)</source><year>2023</year><month>01</month><day>12</day><volume>13</volume><issue>2</issue><fpage>286</fpage><pub-id pub-id-type="doi">10.3390/diagnostics13020286</pub-id><pub-id pub-id-type="medline">36673096</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gholipour</surname><given-names>M</given-names> </name><name name-style="western"><surname>Khajouei</surname><given-names>R</given-names> </name><name name-style="western"><surname>Amiri</surname><given-names>P</given-names> </name><name name-style="western"><surname>Hajesmaeel Gohari</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ahmadian</surname><given-names>L</given-names> </name></person-group><article-title>Extracting cancer concepts from clinical notes using natural language processing: a systematic review</article-title><source>BMC Bioinformatics</source><year>2023</year><month>10</month><day>29</day><volume>24</volume><issue>1</issue><fpage>405</fpage><pub-id pub-id-type="doi">10.1186/s12859-023-05480-0</pub-id><pub-id pub-id-type="medline">37898795</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sangariyavanich</surname><given-names>E</given-names> </name><name name-style="western"><surname>Ponthongmak</surname><given-names>W</given-names> </name><name name-style="western"><surname>Tansawet</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Systematic review of natural language processing for recurrent cancer detection from electronic medical records</article-title><source>Inform Med Unlocked</source><year>2023</year><volume>41</volume><fpage>101326</fpage><pub-id pub-id-type="doi">10.1016/j.imu.2023.101326</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Rastegar-Mojarad</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Clinical information extraction applications: a literature review</article-title><source>J Biomed Inform</source><year>2018</year><month>01</month><volume>77</volume><fpage>34</fpage><lpage>49</lpage><pub-id pub-id-type="doi">10.1016/j.jbi.2017.11.011</pub-id><pub-id pub-id-type="medline">29162496</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sim</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Horan</surname><given-names>MR</given-names> </name><etal/></person-group><article-title>Natural language processing with machine learning methods to analyze unstructured patient-reported outcomes derived from electronic health records: a systematic review</article-title><source>Artif Intell Med</source><year>2023</year><month>12</month><volume>146</volume><fpage>102701</fpage><pub-id pub-id-type="doi">10.1016/j.artmed.2023.102701</pub-id><pub-id pub-id-type="medline">38042599</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sheikhalishahi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Miotto</surname><given-names>R</given-names> </name><name name-style="western"><surname>Dudley</surname><given-names>JT</given-names> </name><name name-style="western"><surname>Lavelli</surname><given-names>A</given-names> </name><name name-style="western"><surname>Rinaldi</surname><given-names>F</given-names> </name><name name-style="western"><surname>Osmani</surname><given-names>V</given-names> </name></person-group><article-title>Natural language processing of clinical notes on chronic diseases: systematic review</article-title><source>JMIR Med Inform</source><year>2019</year><month>04</month><day>27</day><volume>7</volume><issue>2</issue><fpage>e12239</fpage><pub-id pub-id-type="doi">10.2196/12239</pub-id><pub-id pub-id-type="medline">31066697</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tricco</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Lillie</surname><given-names>E</given-names> </name><name name-style="western"><surname>Zarin</surname><given-names>W</given-names> </name><etal/></person-group><article-title>PRISMA Extension for Scoping Reviews (PRISMA-ScR): checklist and explanation</article-title><source>Ann Intern Med</source><year>2018</year><month>10</month><day>2</day><volume>169</volume><issue>7</issue><fpage>467</fpage><lpage>473</lpage><pub-id pub-id-type="doi">10.7326/M18-0850</pub-id><pub-id pub-id-type="medline">30178033</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Munn</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Peters</surname><given-names>MDJ</given-names> </name><name name-style="western"><surname>Stern</surname><given-names>C</given-names> </name><name name-style="western"><surname>Tufanaru</surname><given-names>C</given-names> </name><name name-style="western"><surname>McArthur</surname><given-names>A</given-names> </name><name name-style="western"><surname>Aromataris</surname><given-names>E</given-names> </name></person-group><article-title>Systematic review or scoping review? Guidance for authors when choosing between a systematic or scoping review approach</article-title><source>BMC Med Res Methodol</source><year>2018</year><month>11</month><day>19</day><volume>18</volume><issue>1</issue><fpage>143</fpage><pub-id pub-id-type="doi">10.1186/s12874-018-0611-x</pub-id><pub-id pub-id-type="medline">30453902</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sultan</surname><given-names>I</given-names> </name><name name-style="western"><surname>Al-Abdallat</surname><given-names>H</given-names> </name><name name-style="western"><surname>Alnajjar</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Using ChatGPT to predict cancer predisposition genes: a promising tool for pediatric oncologists</article-title><source>Cureus</source><year>2023</year><month>10</month><volume>15</volume><issue>10</issue><fpage>e47594</fpage><pub-id pub-id-type="doi">10.7759/cureus.47594</pub-id><pub-id pub-id-type="medline">38021917</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McGowan</surname><given-names>M</given-names> </name><name name-style="western"><surname>Correia Martins</surname><given-names>F</given-names> </name><name name-style="western"><surname>Keen</surname><given-names>JL</given-names> </name><etal/></person-group><article-title>Can natural language processing be effectively applied for audit data analysis in gynaecological oncology at a UK cancer centre?</article-title><source>Int J Med Inform</source><year>2024</year><month>02</month><volume>182</volume><fpage>105306</fpage><pub-id pub-id-type="doi">10.1016/j.ijmedinf.2023.105306</pub-id><pub-id pub-id-type="medline">38065003</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Solarte-Pabon</surname><given-names>O</given-names> </name><name name-style="western"><surname>Blazquez-Herranz</surname><given-names>A</given-names> </name><name name-style="western"><surname>Torrente</surname><given-names>M</given-names> </name><name name-style="western"><surname>Rodriguez-Gonzalez</surname><given-names>A</given-names> </name><name name-style="western"><surname>Provencio</surname><given-names>M</given-names> </name><name name-style="western"><surname>Menasalvas</surname><given-names>E</given-names> </name></person-group><article-title>Extracting cancer treatments from clinical text written in Spanish: a deep learning approach</article-title><year>2021</year><conf-name>2021 IEEE 8th International Conference on Data Science and Advanced Analytics (DSAA)</conf-name><conf-date>Oct 6-9, 2021</conf-date><pub-id pub-id-type="doi">10.1109/DSAA53316.2021.9564137</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Paolo</surname><given-names>D</given-names> </name><name name-style="western"><surname>Bria</surname><given-names>A</given-names> </name><name name-style="western"><surname>Greco</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Named entity recognition in Italian lung cancer clinical reports using transformers</article-title><year>2023</year><conf-name>2023 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)</conf-name><conf-date>Dec 5-8, 2023</conf-date><conf-loc>Istanbul, Turkiye</conf-loc><fpage>2023</fpage><pub-id pub-id-type="doi">10.1109/BIBM58861.2023.10385778</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Q</given-names> </name><etal/></person-group><article-title>Extracting comprehensive clinical information for breast cancer using deep learning methods</article-title><source>Int J Med Inform</source><year>2019</year><month>12</month><volume>132</volume><fpage>103985</fpage><pub-id pub-id-type="doi">10.1016/j.ijmedinf.2019.103985</pub-id><pub-id pub-id-type="medline">31627032</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Rivera-Zavala</surname><given-names>R</given-names> </name><name name-style="western"><surname>Martinez</surname><given-names>P</given-names> </name></person-group><article-title>Deep neural model with contextualized-word embeddings for named entity recognition in Spanish clinical text</article-title><conf-name>Proceedings of the Iberian Languages Evaluation Forum (IberLEF 2020) CEUR Workshop Proceedings (CEUR-WS.org)</conf-name><conf-date>Sep 22-24, 2024</conf-date></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zelina</surname><given-names>P</given-names> </name><name name-style="western"><surname>Hal&#x00E1;mkov&#x00E1;</surname><given-names>J</given-names> </name><name name-style="western"><surname>Nov&#x00E1;&#x010D;ek</surname><given-names>V</given-names> </name></person-group><article-title>Extraction, labeling, clustering, and semantic mapping of segments from clinical notes</article-title><source>IEEE Transon Nanobioscience</source><year>2023</year><volume>22</volume><issue>4</issue><fpage>781</fpage><lpage>788</lpage><pub-id pub-id-type="doi">10.1109/TNB.2023.3275195</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Araki</surname><given-names>K</given-names> </name><name name-style="western"><surname>Matsumoto</surname><given-names>N</given-names> </name><name name-style="western"><surname>Togo</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Developing artificial intelligence models for extracting oncologic outcomes from Japanese electronic health records</article-title><source>Adv Ther</source><year>2023</year><month>03</month><volume>40</volume><issue>3</issue><fpage>934</fpage><lpage>950</lpage><pub-id pub-id-type="doi">10.1007/s12325-022-02397-7</pub-id><pub-id pub-id-type="medline">36547809</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Garc&#x00ED;a-Pablos</surname><given-names>A</given-names> </name><name name-style="western"><surname>Perez</surname><given-names>N</given-names> </name></person-group><article-title>Vicomtech at CANTEMIST 2020</article-title><conf-name>Proceedings of the Iberian Languages Evaluation Forum (IberLEF2020) CEUR Workshop Proceedings (CEUR-WS.org)</conf-name><conf-date>Sep 22-24, 2020</conf-date></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Karlsson</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ellonen</surname><given-names>A</given-names> </name><name name-style="western"><surname>Irjala</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Impact of deep learning-determined smoking status on mortality of cancer patients: never too late to quit</article-title><source>ESMO Open</source><year>2021</year><month>06</month><volume>6</volume><issue>3</issue><fpage>100175</fpage><pub-id pub-id-type="doi">10.1016/j.esmoop.2021.100175</pub-id><pub-id pub-id-type="medline">34091262</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Chapman</surname><given-names>K</given-names> </name><name name-style="western"><surname>Neumann</surname><given-names>G</given-names> </name></person-group><article-title>Automatic ICD code classification with label description attention mechanism</article-title><conf-name>Proceedings of the Iberian Languages Evaluation Forum (IberLEF 2020) CEUR Workshop Proceedings (CEUR-WS.org)</conf-name><conf-date>Sep 22-24, 2020</conf-date></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Solarte-Pab&#x00F3;n</surname><given-names>O</given-names> </name><name name-style="western"><surname>Montenegro</surname><given-names>O</given-names> </name><name name-style="western"><surname>Garc&#x00ED;a-Barrag&#x00E1;n</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Transformers for extracting breast cancer information from Spanish clinical narratives</article-title><source>Artif Intell Med</source><year>2023</year><month>09</month><volume>143</volume><fpage>102625</fpage><pub-id pub-id-type="doi">10.1016/j.artmed.2023.102625</pub-id><pub-id pub-id-type="medline">37673566</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Osborne</surname><given-names>JD</given-names> </name><name name-style="western"><surname>O&#x2019;leary</surname><given-names>T</given-names> </name><name name-style="western"><surname>Del</surname><given-names>MJ</given-names> </name><etal/></person-group><article-title>Identification of cancer entities in clinical text combining transformers with dictionary features</article-title><conf-name>Proceedings of the Iberian Languages Evaluation Forum (IberLEF 2020) CEUR Workshop Proceedings (CEUR-WS.org)</conf-name><conf-date>Sep 22-24, 2020</conf-date></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Solarte Pab&#x00F3;n</surname><given-names>O</given-names> </name><name name-style="western"><surname>Montenegro</surname><given-names>O</given-names> </name><name name-style="western"><surname>Torrente</surname><given-names>M</given-names> </name><name name-style="western"><surname>Rodr&#x00ED;guez Gonz&#x00E1;lez</surname><given-names>A</given-names> </name><name name-style="western"><surname>Provencio</surname><given-names>M</given-names> </name><name name-style="western"><surname>Menasalvas</surname><given-names>E</given-names> </name></person-group><article-title>Negation and uncertainty detection in clinical texts written in Spanish: a deep learning-based approach</article-title><source>PeerJ Comput Sci</source><year>2022</year><volume>8</volume><fpage>e913</fpage><pub-id pub-id-type="doi">10.7717/peerj-cs.913</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Banerjee</surname><given-names>I</given-names> </name><name name-style="western"><surname>Gensheimer</surname><given-names>MF</given-names> </name><name name-style="western"><surname>Wood</surname><given-names>DJ</given-names> </name><etal/></person-group><article-title>Probabilistic prognostic estimates of survival in metastatic cancer patients (PPES-Met) utilizing free-text clinical narratives</article-title><source>Sci Rep</source><year>2018</year><month>07</month><day>3</day><volume>8</volume><issue>1</issue><fpage>10037</fpage><pub-id pub-id-type="doi">10.1038/s41598-018-27946-5</pub-id><pub-id pub-id-type="medline">29968730</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gray</surname><given-names>SW</given-names> </name><name name-style="western"><surname>Ottesen</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Currey</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Leveraging an informatics approach to identify an unmet clinical need for BRCA1/2 testing among patients with ovarian cancer</article-title><source>JCO Clin Cancer Inform</source><year>2022</year><month>09</month><volume>6</volume><fpage>e2200034</fpage><pub-id pub-id-type="doi">10.1200/CCI.22.00034</pub-id><pub-id pub-id-type="medline">36049148</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kaka</surname><given-names>H</given-names> </name><name name-style="western"><surname>Michalopoulos</surname><given-names>G</given-names> </name><name name-style="western"><surname>Subendran</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Pretrained neural networks accurately identify cancer recurrence in medical record</article-title><source>Stud Health Technol Inform</source><year>2022</year><month>05</month><day>25</day><volume>294</volume><fpage>93</fpage><lpage>97</lpage><pub-id pub-id-type="doi">10.3233/SHTI220403</pub-id><pub-id pub-id-type="medline">35612023</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Banerjee</surname><given-names>I</given-names> </name><name name-style="western"><surname>Bozkurt</surname><given-names>S</given-names> </name><name name-style="western"><surname>Caswell-Jin</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Kurian</surname><given-names>AW</given-names> </name><name name-style="western"><surname>Rubin</surname><given-names>DL</given-names> </name></person-group><article-title>Natural language processing approaches to detect the timeline of metastatic recurrence of breast cancer</article-title><source>JCO Clin Cancer Inform</source><year>2019</year><month>10</month><volume>3</volume><fpage>1</fpage><lpage>12</lpage><pub-id pub-id-type="doi">10.1200/CCI.19.00034</pub-id><pub-id pub-id-type="medline">31584836</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Velupillai</surname><given-names>S</given-names> </name><name name-style="western"><surname>Mowery</surname><given-names>DL</given-names> </name><name name-style="western"><surname>Abdelrahman</surname><given-names>S</given-names> </name><name name-style="western"><surname>Christensen</surname><given-names>L</given-names> </name><name name-style="western"><surname>Chapman</surname><given-names>W</given-names> </name></person-group><article-title>BluLab: temporal information extraction for the 2015 clinical tempeval challenge</article-title><conf-name>Proceedings of the 9th International Workshop on Semantic Evaluation (SemEval 2015)</conf-name><conf-date>Jun 4-5, 2015</conf-date><pub-id pub-id-type="doi">10.18653/v1/S15-2137</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Miller</surname><given-names>T</given-names> </name><name name-style="western"><surname>Laparra</surname><given-names>E</given-names> </name><name name-style="western"><surname>Bethard</surname><given-names>S</given-names> </name><etal/></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Ben-David</surname><given-names>E</given-names> </name><name name-style="western"><surname>Cohen</surname><given-names>S</given-names> </name><name name-style="western"><surname>McDonald</surname><given-names>R</given-names> </name></person-group><article-title>Domain adaptation in practice: lessons from a real-world information extraction pipeline</article-title><conf-name>Proceedings of the Second Workshop on Domain Adaptation for NLP</conf-name><conf-date>Aug 1, 2019</conf-date></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hong</surname><given-names>J</given-names> </name><name name-style="western"><surname>Davoudi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>S</given-names> </name><name name-style="western"><surname>Mowery</surname><given-names>DL</given-names> </name></person-group><article-title>Annotation and extraction of age and temporally-related events from clinical histories</article-title><source>BMC Med Inform Decis Mak</source><year>2020</year><month>12</month><day>30</day><volume>20</volume><issue>Suppl 11</issue><fpage>338</fpage><pub-id pub-id-type="doi">10.1186/s12911-020-01333-5</pub-id><pub-id pub-id-type="medline">33380319</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Li</surname><given-names>C</given-names> </name><name name-style="western"><surname>Long</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>X</given-names> </name></person-group><article-title>A system for automatically extracting clinical events with temporal information</article-title><source>BMC Med Inform Decis Mak</source><year>2020</year><month>12</month><volume>20</volume><issue>1</issue><fpage>1</fpage><lpage>13</lpage><pub-id pub-id-type="doi">10.1186/s12911-020-01208-9</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bitterman</surname><given-names>DS</given-names> </name><name name-style="western"><surname>Goldner</surname><given-names>E</given-names> </name><name name-style="western"><surname>Finan</surname><given-names>S</given-names> </name><etal/></person-group><article-title>An end-to-end natural language processing system for automatically extracting radiation therapy events from clinical texts</article-title><source>Int J Radiat Oncol Biol Phys</source><year>2023</year><month>09</month><day>1</day><volume>117</volume><issue>1</issue><fpage>262</fpage><lpage>273</lpage><pub-id pub-id-type="doi">10.1016/j.ijrobp.2023.03.055</pub-id><pub-id pub-id-type="medline">36990288</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Adamson</surname><given-names>B</given-names> </name><name name-style="western"><surname>Waskom</surname><given-names>M</given-names> </name><name name-style="western"><surname>Blarre</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Approach to machine learning for extraction of real-world data variables from electronic health records</article-title><source>Front Pharmacol</source><year>2023</year><volume>14</volume><fpage>1180962</fpage><pub-id pub-id-type="doi">10.3389/fphar.2023.1180962</pub-id><pub-id pub-id-type="medline">37781703</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Raghavan</surname><given-names>P</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Fosler-Lussier</surname><given-names>E</given-names> </name><name name-style="western"><surname>Lai</surname><given-names>AM</given-names> </name></person-group><article-title>How essential are unstructured clinical narratives and information fusion to clinical trial recruitment?</article-title><source>AMIA Jt Summits Transl Sci Proc</source><year>2014</year><volume>2014</volume><issue>218</issue><fpage>218</fpage><lpage>223</lpage><pub-id pub-id-type="medline">25717416</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Solarte Pab&#x00F3;n</surname><given-names>O</given-names> </name><name name-style="western"><surname>Torrente</surname><given-names>M</given-names> </name><name name-style="western"><surname>Provencio</surname><given-names>M</given-names> </name><name name-style="western"><surname>Rodr&#x00ED;guez-Gonzalez</surname><given-names>A</given-names> </name><name name-style="western"><surname>Menasalvas</surname><given-names>E</given-names> </name></person-group><article-title>Integrating speculation detection and deep learning to extract lung cancer diagnosis from clinical notes</article-title><source>Appl Sci (Basel)</source><year>2021</year><volume>11</volume><issue>2</issue><fpage>865</fpage><pub-id pub-id-type="doi">10.3390/app11020865</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Guin</surname><given-names>S</given-names> </name><name name-style="western"><surname>Jun</surname><given-names>T</given-names> </name><name name-style="western"><surname>Patel</surname><given-names>VG</given-names> </name><etal/></person-group><article-title>Extraction of treatment information from electronic health records and evaluation of testosterone recovery in patients with prostate cancer</article-title><source>JCO Clin Cancer Inform</source><year>2022</year><month>06</month><volume>6</volume><fpage>e2200010</fpage><pub-id pub-id-type="doi">10.1200/CCI.22.00010</pub-id><pub-id pub-id-type="medline">35696627</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Najafabadipour</surname><given-names>M</given-names> </name><name name-style="western"><surname>Zanin</surname><given-names>M</given-names> </name><name name-style="western"><surname>Rodr&#x00ED;guez-Gonz&#x00E1;lez</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Reconstructing the patient&#x2019;s natural history from electronic health records</article-title><source>Artif Intell Med</source><year>2020</year><month>05</month><volume>105</volume><fpage>101860</fpage><pub-id pub-id-type="doi">10.1016/j.artmed.2020.101860</pub-id><pub-id pub-id-type="medline">32505419</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Wampfler</surname><given-names>J</given-names> </name><name name-style="western"><surname>Dispenzieri</surname><given-names>A</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>H</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>P</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>H</given-names> </name></person-group><article-title>Achievability to extract specific date information for cancer research</article-title><source>AMIA Annu Symp Proc</source><year>2019</year><volume>2019</volume><issue>893</issue><fpage>893</fpage><lpage>902</lpage><pub-id pub-id-type="medline">32308886</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fu</surname><given-names>JT</given-names> </name><name name-style="western"><surname>Sholle</surname><given-names>E</given-names> </name><name name-style="western"><surname>Krichevsky</surname><given-names>S</given-names> </name><name name-style="western"><surname>Scandura</surname><given-names>J</given-names> </name><name name-style="western"><surname>Campion</surname><given-names>TR</given-names> </name></person-group><article-title>Extracting and classifying diagnosis dates from clinical notes: a case study</article-title><source>J Biomed Inform</source><year>2020</year><month>10</month><volume>110</volume><fpage>103569</fpage><pub-id pub-id-type="doi">10.1016/j.jbi.2020.103569</pub-id><pub-id pub-id-type="medline">32949781</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Solarte-Pabon</surname><given-names>O</given-names> </name><name name-style="western"><surname>Torrente</surname><given-names>M</given-names> </name><name name-style="western"><surname>Rodriguez-Gonzalez</surname><given-names>A</given-names> </name><name name-style="western"><surname>Provencio</surname><given-names>M</given-names> </name><name name-style="western"><surname>Menasalvas</surname><given-names>E</given-names> </name><name name-style="western"><surname>Tunas</surname><given-names>JM</given-names> </name></person-group><article-title>Lung cancer diagnosis extraction from clinical notes written in Spanish</article-title><conf-name>2020 IEEE 33rd International Symposium on Computer-Based Medical Systems (CBMS)</conf-name><conf-date>Jul 28-30, 2020</conf-date><pub-id pub-id-type="doi">10.1109/CBMS49503.2020.00099</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rumeng</surname><given-names>L</given-names> </name><name name-style="western"><surname>Abhyuday N</surname><given-names>J</given-names> </name><name name-style="western"><surname>Hong</surname><given-names>Y</given-names> </name></person-group><article-title>A hybrid neural network model for joint prediction of presence and period assertions of medical events in clinical notes</article-title><source>AMIA Annu Symp Proc</source><year>2017</year><volume>2017</volume><issue>1149</issue><fpage>1149</fpage><lpage>1158</lpage><pub-id pub-id-type="medline">29854183</pub-id></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Palmer</surname><given-names>EL</given-names> </name><name name-style="western"><surname>Hassanpour</surname><given-names>S</given-names> </name><name name-style="western"><surname>Higgins</surname><given-names>J</given-names> </name><name name-style="western"><surname>Doherty</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Onega</surname><given-names>T</given-names> </name></person-group><article-title>Building a tobacco user registry by extracting multiple smoking behaviors from clinical notes</article-title><source>BMC Med Inform Decis Mak</source><year>2019</year><month>07</month><day>25</day><volume>19</volume><issue>1</issue><fpage>141</fpage><pub-id pub-id-type="doi">10.1186/s12911-019-0863-3</pub-id><pub-id pub-id-type="medline">31340796</pub-id></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yu</surname><given-names>S</given-names> </name><name name-style="western"><surname>Le</surname><given-names>A</given-names> </name><name name-style="western"><surname>Feld</surname><given-names>E</given-names> </name><etal/></person-group><article-title>A natural language processing-assisted extraction system for Gleason scores: development and usability study</article-title><source>JMIR Cancer</source><year>2021</year><month>07</month><day>2</day><volume>7</volume><issue>3</issue><fpage>e27970</fpage><pub-id pub-id-type="doi">10.2196/27970</pub-id><pub-id pub-id-type="medline">34255641</pub-id></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Biron</surname><given-names>P</given-names> </name><name name-style="western"><surname>Metzger</surname><given-names>MH</given-names> </name><name name-style="western"><surname>Pezet</surname><given-names>C</given-names> </name><name name-style="western"><surname>Sebban</surname><given-names>C</given-names> </name><name name-style="western"><surname>Barthuet</surname><given-names>E</given-names> </name><name name-style="western"><surname>Durand</surname><given-names>T</given-names> </name></person-group><article-title>An information retrieval system for computerized patient records in the context of a daily hospital practice: the example of the L&#x00E9;on B&#x00E9;rard Cancer Center (France)</article-title><source>Appl Clin Inform</source><year>2014</year><volume>5</volume><issue>1</issue><fpage>191</fpage><lpage>205</lpage><pub-id pub-id-type="doi">10.4338/ACI-2013-08-CR-0065</pub-id><pub-id pub-id-type="medline">24734133</pub-id></nlm-citation></ref><ref id="ref63"><label>63</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Zhu</surname><given-names>W</given-names> </name><name name-style="western"><surname>Teh</surname><given-names>JB</given-names> </name><name name-style="western"><surname>Li</surname><given-names>H</given-names> </name><name name-style="western"><surname>Armenian</surname><given-names>SH</given-names> </name></person-group><article-title>Knowledge extraction of long-term complications from clinical narratives of blood cancer patients with HCT treatments</article-title><year>2018</year><month>08</month><day>15</day><conf-name>BCB &#x2019;18: Proceedings of the 2018 ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics</conf-name><conf-date>Aug 29 to Sep 1, 2018</conf-date><pub-id pub-id-type="doi">10.1145/3233547.3233635</pub-id></nlm-citation></ref><ref id="ref64"><label>64</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Osborne</surname><given-names>JD</given-names> </name><name name-style="western"><surname>Wyatt</surname><given-names>M</given-names> </name><name name-style="western"><surname>Westfall</surname><given-names>AO</given-names> </name><name name-style="western"><surname>Willig</surname><given-names>J</given-names> </name><name name-style="western"><surname>Bethard</surname><given-names>S</given-names> </name><name name-style="western"><surname>Gordon</surname><given-names>G</given-names> </name></person-group><article-title>Efficient identification of nationally mandated reportable cancer cases using natural language processing and machine learning</article-title><source>J Am Med Inform Assoc</source><year>2016</year><month>11</month><volume>23</volume><issue>6</issue><fpage>1077</fpage><lpage>1084</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocw006</pub-id><pub-id pub-id-type="medline">27026618</pub-id></nlm-citation></ref><ref id="ref65"><label>65</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cohen</surname><given-names>AB</given-names> </name><name name-style="western"><surname>Rosic</surname><given-names>A</given-names> </name><name name-style="western"><surname>Harrison</surname><given-names>K</given-names> </name><etal/></person-group><article-title>A natural language processing algorithm to improve completeness of ECOG performance status in real-world data</article-title><source>Appl Sci (Basel)</source><year>2023</year><volume>13</volume><issue>10</issue><fpage>6209</fpage><pub-id pub-id-type="doi">10.3390/app13106209</pub-id></nlm-citation></ref><ref id="ref66"><label>66</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tamang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Patel</surname><given-names>MI</given-names> </name><name name-style="western"><surname>Blayney</surname><given-names>DW</given-names> </name><etal/></person-group><article-title>Detecting unplanned care from clinician notes in electronic health records</article-title><source>J Oncol Pract</source><year>2015</year><month>05</month><volume>11</volume><issue>3</issue><fpage>e313</fpage><lpage>9</lpage><pub-id pub-id-type="doi">10.1200/JOP.2014.002741</pub-id><pub-id pub-id-type="medline">25980019</pub-id></nlm-citation></ref><ref id="ref67"><label>67</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Karimi</surname><given-names>YH</given-names> </name><name name-style="western"><surname>Blayney</surname><given-names>DW</given-names> </name><name name-style="western"><surname>Kurian</surname><given-names>AW</given-names> </name><etal/></person-group><article-title>Development and use of natural language processing for identification of distant cancer recurrence and sites of distant recurrence using unstructured electronic health record data</article-title><source>JCO Clin Cancer Inform</source><year>2021</year><month>04</month><volume>5</volume><issue>5</issue><fpage>469</fpage><lpage>478</lpage><pub-id pub-id-type="doi">10.1200/CCI.20.00165</pub-id><pub-id pub-id-type="medline">33929889</pub-id></nlm-citation></ref><ref id="ref68"><label>68</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hernandez-Boussard</surname><given-names>T</given-names> </name><name name-style="western"><surname>Tamang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Blayney</surname><given-names>D</given-names> </name><name name-style="western"><surname>Brooks</surname><given-names>J</given-names> </name><name name-style="western"><surname>Shah</surname><given-names>N</given-names> </name></person-group><article-title>New paradigms for patient-centered outcomes research in electronic medical records: an example of detecting urinary incontinence following prostatectomy</article-title><source>EGEMS (Wash DC)</source><year>2016</year><volume>4</volume><issue>3</issue><fpage>1231</fpage><pub-id pub-id-type="doi">10.13063/2327-9214.1231</pub-id><pub-id pub-id-type="medline">27347492</pub-id></nlm-citation></ref><ref id="ref69"><label>69</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hjaltelin</surname><given-names>JX</given-names> </name><name name-style="western"><surname>Novitski</surname><given-names>SI</given-names> </name><name name-style="western"><surname>J&#x00F8;rgensen</surname><given-names>IF</given-names> </name><etal/></person-group><article-title>Pancreatic cancer symptom trajectories from Danish registry data and free text in electronic health records</article-title><source>Elife</source><year>2023</year><month>11</month><day>21</day><volume>12</volume><fpage>e84919</fpage><pub-id pub-id-type="doi">10.7554/eLife.84919</pub-id><pub-id pub-id-type="medline">37988407</pub-id></nlm-citation></ref><ref id="ref70"><label>70</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Ruan</surname><given-names>X</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>P</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>H</given-names> </name></person-group><article-title>Comparison of three information sources for smoking information in electronic health records</article-title><source>Cancer Inform</source><year>2016</year><volume>15</volume><fpage>237</fpage><lpage>242</lpage><pub-id pub-id-type="doi">10.4137/CIN.S40604</pub-id><pub-id pub-id-type="medline">27980387</pub-id></nlm-citation></ref><ref id="ref71"><label>71</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Prado</surname><given-names>MG</given-names> </name><name name-style="western"><surname>Kessler</surname><given-names>LG</given-names> </name><name name-style="western"><surname>Au</surname><given-names>MA</given-names> </name><etal/></person-group><article-title>Symptoms and signs of lung cancer prior to diagnosis: case-control study using electronic health records from ambulatory care within a large US-based tertiary care centre</article-title><source>BMJ Open</source><year>2023</year><month>04</month><day>20</day><volume>13</volume><issue>4</issue><fpage>e068832</fpage><pub-id pub-id-type="doi">10.1136/bmjopen-2022-068832</pub-id><pub-id pub-id-type="medline">37080616</pub-id></nlm-citation></ref><ref id="ref72"><label>72</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shi</surname><given-names>J</given-names> </name><name name-style="western"><surname>Morgan</surname><given-names>KL</given-names> </name><name name-style="western"><surname>Bradshaw</surname><given-names>RL</given-names> </name><etal/></person-group><article-title>Identifying patients who meet criteria for genetic testing of hereditary cancers based on structured and unstructured family health history data in the electronic health record: natural language processing approach</article-title><source>JMIR Med Inform</source><year>2022</year><month>08</month><day>11</day><volume>10</volume><issue>8</issue><fpage>e37842</fpage><pub-id pub-id-type="doi">10.2196/37842</pub-id><pub-id pub-id-type="medline">35969459</pub-id></nlm-citation></ref><ref id="ref73"><label>73</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bozkurt</surname><given-names>S</given-names> </name><name name-style="western"><surname>Magnani</surname><given-names>CJ</given-names> </name><name name-style="western"><surname>Seneviratne</surname><given-names>MG</given-names> </name><name name-style="western"><surname>Brooks</surname><given-names>JD</given-names> </name><name name-style="western"><surname>Hernandez-Boussard</surname><given-names>T</given-names> </name></person-group><article-title>Expanding the secondary use of prostate cancer real world data: automated classifiers for clinical and pathological stage</article-title><source>Front Digit Health</source><year>2022</year><volume>4</volume><fpage>793316</fpage><pub-id pub-id-type="doi">10.3389/fdgth.2022.793316</pub-id><pub-id pub-id-type="medline">35721793</pub-id></nlm-citation></ref><ref id="ref74"><label>74</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Breitenstein</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>H</given-names> </name><name name-style="western"><surname>Maxwell</surname><given-names>KN</given-names> </name><name name-style="western"><surname>Pathak</surname><given-names>J</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>R</given-names> </name></person-group><article-title>Electronic health record phenotypes for precision medicine: perspectives and caveats from treatment of breast cancer at a single institution</article-title><source>Clin Transl Sci</source><year>2018</year><month>01</month><volume>11</volume><issue>1</issue><fpage>85</fpage><lpage>92</lpage><pub-id pub-id-type="doi">10.1111/cts.12514</pub-id><pub-id pub-id-type="medline">29084368</pub-id></nlm-citation></ref><ref id="ref75"><label>75</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>S</given-names> </name><name name-style="western"><surname>McCoy</surname><given-names>AB</given-names> </name><name name-style="western"><surname>Aldrich</surname><given-names>MC</given-names> </name><etal/></person-group><article-title>Leveraging natural language processing to identify eligible lung cancer screening patients with the electronic health record</article-title><source>Int J Med Inform</source><year>2023</year><month>09</month><volume>177</volume><fpage>105136</fpage><pub-id pub-id-type="doi">10.1016/j.ijmedinf.2023.105136</pub-id><pub-id pub-id-type="medline">37392712</pub-id></nlm-citation></ref><ref id="ref76"><label>76</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schiappa</surname><given-names>R</given-names> </name><name name-style="western"><surname>Contu</surname><given-names>S</given-names> </name><name name-style="western"><surname>Culie</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Validation of RUBY for breast cancer knowledge extraction from a large French electronic medical record system</article-title><source>JCO Clin Cancer Inform</source><year>2023</year><month>05</month><volume>7</volume><fpage>e2200130</fpage><pub-id pub-id-type="doi">10.1200/CCI.22.00130</pub-id><pub-id pub-id-type="medline">37235837</pub-id></nlm-citation></ref><ref id="ref77"><label>77</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Beck</surname><given-names>JT</given-names> </name><name name-style="western"><surname>Rammage</surname><given-names>M</given-names> </name><name name-style="western"><surname>Jackson</surname><given-names>GP</given-names> </name><etal/></person-group><article-title>Artificial intelligence tool for optimizing eligibility screening for clinical trials in a large community cancer center</article-title><source>JCO Clin Cancer Inform</source><year>2020</year><month>01</month><volume>4</volume><fpage>50</fpage><lpage>59</lpage><pub-id pub-id-type="doi">10.1200/CCI.19.00079</pub-id><pub-id pub-id-type="medline">31977254</pub-id></nlm-citation></ref><ref id="ref78"><label>78</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lindvall</surname><given-names>C</given-names> </name><name name-style="western"><surname>Lilley</surname><given-names>EJ</given-names> </name><name name-style="western"><surname>Zupanc</surname><given-names>SN</given-names> </name><etal/></person-group><article-title>Natural language processing to assess end-of-life quality indicators in cancer patients receiving palliative surgery</article-title><source>J Palliat Med</source><year>2019</year><month>02</month><volume>22</volume><issue>2</issue><fpage>183</fpage><lpage>187</lpage><pub-id pub-id-type="doi">10.1089/jpm.2018.0326</pub-id><pub-id pub-id-type="medline">30328764</pub-id></nlm-citation></ref><ref id="ref79"><label>79</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gauthier</surname><given-names>MP</given-names> </name><name name-style="western"><surname>Law</surname><given-names>JH</given-names> </name><name name-style="western"><surname>Le</surname><given-names>LW</given-names> </name><etal/></person-group><article-title>Automating access to real-world evidence</article-title><source>JTO Clin Res Rep</source><year>2022</year><month>06</month><volume>3</volume><issue>6</issue><fpage>100340</fpage><pub-id pub-id-type="doi">10.1016/j.jtocrr.2022.100340</pub-id><pub-id pub-id-type="medline">35719866</pub-id></nlm-citation></ref><ref id="ref80"><label>80</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lin</surname><given-names>E</given-names> </name><name name-style="western"><surname>Zwolinski</surname><given-names>R</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>JTY</given-names> </name><etal/></person-group><article-title>Machine learning-based natural language processing to extract PD-L1 expression levels from clinical notes</article-title><source>Health Informatics J</source><year>2023</year><volume>29</volume><issue>3</issue><fpage>14604582231198021</fpage><pub-id pub-id-type="doi">10.1177/14604582231198021</pub-id><pub-id pub-id-type="medline">37635280</pub-id></nlm-citation></ref><ref id="ref81"><label>81</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lindvall</surname><given-names>C</given-names> </name><name name-style="western"><surname>Deng</surname><given-names>CY</given-names> </name><name name-style="western"><surname>Moseley</surname><given-names>E</given-names> </name><etal/></person-group><article-title>Natural language processing to identify advance care planning documentation in a multisite pragmatic clinical trial</article-title><source>J Pain Symptom Manage</source><year>2022</year><month>01</month><volume>63</volume><issue>1</issue><fpage>e29</fpage><lpage>e36</lpage><pub-id pub-id-type="doi">10.1016/j.jpainsymman.2021.06.025</pub-id><pub-id pub-id-type="medline">34271146</pub-id></nlm-citation></ref><ref id="ref82"><label>82</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>K</given-names> </name><name name-style="western"><surname>Cui</surname><given-names>H</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Evaluation of an artificial intelligence-based clinical trial matching system in Chinese patients with hepatocellular carcinoma: a retrospective study</article-title><source>BMC Cancer</source><year>2024</year><volume>24</volume><issue>1</issue><fpage>1</fpage><lpage>7</lpage><pub-id pub-id-type="doi">10.1186/s12885-024-11959-7</pub-id></nlm-citation></ref><ref id="ref83"><label>83</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lin</surname><given-names>FPY</given-names> </name><name name-style="western"><surname>Salih</surname><given-names>OSM</given-names> </name><name name-style="western"><surname>Scott</surname><given-names>N</given-names> </name><name name-style="western"><surname>Jameson</surname><given-names>MB</given-names> </name><name name-style="western"><surname>Epstein</surname><given-names>RJ</given-names> </name></person-group><article-title>Development and validation of a machine learning approach leveraging real-world clinical narratives as a predictor of survival in advanced cancer</article-title><source>JCO Clin Cancer Inform</source><year>2022</year><month>10</month><volume>PMID</volume><issue>6</issue><fpage>36265112</fpage><pub-id pub-id-type="doi">10.1200/CCI.22.00064</pub-id></nlm-citation></ref><ref id="ref84"><label>84</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gensheimer</surname><given-names>MF</given-names> </name><name name-style="western"><surname>Aggarwal</surname><given-names>S</given-names> </name><name name-style="western"><surname>Benson</surname><given-names>KRK</given-names> </name><etal/></person-group><article-title>Automated model versus treating physician for predicting survival time of patients with metastatic cancer</article-title><source>J Am Med Inform Assoc</source><year>2021</year><month>06</month><day>12</day><volume>28</volume><issue>6</issue><fpage>1108</fpage><lpage>1116</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocaa290</pub-id></nlm-citation></ref><ref id="ref85"><label>85</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Moseley</surname><given-names>ET</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>JT</given-names> </name><name name-style="western"><surname>Welt</surname><given-names>J</given-names> </name><etal/></person-group><article-title>A corpus for detecting high-context medical conditions in intensive care patient notes focusing on frequently readmitted patients</article-title><comment>Preprint posted online on  Mar 6, 2020</comment><pub-id pub-id-type="doi">10.48550/arXiv.2003.03044</pub-id></nlm-citation></ref><ref id="ref86"><label>86</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Li</surname><given-names>N</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Development and validation of method for defining conditions using Chinese electronic medical record</article-title><source>BMC Med Inform Decis Mak</source><year>2016</year><month>08</month><day>20</day><volume>16</volume><fpage>110</fpage><pub-id pub-id-type="doi">10.1186/s12911-016-0348-6</pub-id><pub-id pub-id-type="medline">27542973</pub-id></nlm-citation></ref><ref id="ref87"><label>87</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Poort</surname><given-names>H</given-names> </name><name name-style="western"><surname>Zupanc</surname><given-names>SN</given-names> </name><name name-style="western"><surname>Leiter</surname><given-names>RE</given-names> </name><name name-style="western"><surname>Wright</surname><given-names>AA</given-names> </name><name name-style="western"><surname>Lindvall</surname><given-names>C</given-names> </name></person-group><article-title>Documentation of palliative and end-of-life care process measures among young adults who died of cancer: a natural language processing approach</article-title><source>J Adolesc Young Adult Oncol</source><year>2020</year><month>02</month><volume>9</volume><issue>1</issue><fpage>100</fpage><lpage>104</lpage><pub-id pub-id-type="doi">10.1089/jayao.2019.0040</pub-id><pub-id pub-id-type="medline">31411524</pub-id></nlm-citation></ref><ref id="ref88"><label>88</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ernecoff</surname><given-names>NC</given-names> </name><name name-style="western"><surname>Wessell</surname><given-names>KL</given-names> </name><name name-style="western"><surname>Hanson</surname><given-names>LC</given-names> </name><etal/></person-group><article-title>Electronic health record phenotypes for identifying patients with late-stage disease: a method for research and clinical application</article-title><source>J Gen Intern Med</source><year>2019</year><month>12</month><volume>34</volume><issue>12</issue><fpage>2818</fpage><lpage>2823</lpage><pub-id pub-id-type="doi">10.1007/s11606-019-05219-9</pub-id><pub-id pub-id-type="medline">31396813</pub-id></nlm-citation></ref><ref id="ref89"><label>89</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Warner</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Levy</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Neuss</surname><given-names>MN</given-names> </name><name name-style="western"><surname>Warner</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Levy</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Neuss</surname><given-names>MN</given-names> </name></person-group><article-title>ReCAP: feasibility and accuracy of extracting cancer stage information from narrative electronic health record data</article-title><source>J Oncol Pract</source><year>2016</year><month>02</month><volume>12</volume><issue>2</issue><fpage>157</fpage><lpage>158</lpage><pub-id pub-id-type="doi">10.1200/JOP.2015.004622</pub-id><pub-id pub-id-type="medline">26306621</pub-id></nlm-citation></ref><ref id="ref90"><label>90</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>L</given-names> </name><name name-style="western"><surname>Song</surname><given-names>L</given-names> </name><name name-style="western"><surname>Shao</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Li</surname><given-names>D</given-names> </name><name name-style="western"><surname>Ding</surname><given-names>K</given-names> </name></person-group><article-title>Using natural language processing to extract clinically useful information from Chinese electronic medical records</article-title><source>Int J Med Inform</source><year>2019</year><month>04</month><volume>124</volume><fpage>6</fpage><lpage>12</lpage><pub-id pub-id-type="doi">10.1016/j.ijmedinf.2019.01.004</pub-id><pub-id pub-id-type="medline">30784428</pub-id></nlm-citation></ref><ref id="ref91"><label>91</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kondratieff</surname><given-names>KE</given-names> </name><name name-style="western"><surname>Brown</surname><given-names>JT</given-names> </name><name name-style="western"><surname>Barron</surname><given-names>M</given-names> </name><name name-style="western"><surname>Warner</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Yin</surname><given-names>Z</given-names> </name></person-group><article-title>Mining medication use patterns from clinical notes for breast cancer patients through a two-stage topic modeling approach</article-title><source>AMIA Jt Summits Transl Sci Proc</source><year>2022</year><volume>2022</volume><issue>303</issue><fpage>303</fpage><lpage>312</lpage><pub-id pub-id-type="medline">35854740</pub-id></nlm-citation></ref><ref id="ref92"><label>92</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hong</surname><given-names>JC</given-names> </name><name name-style="western"><surname>Fairchild</surname><given-names>AT</given-names> </name><name name-style="western"><surname>Tanksley</surname><given-names>JP</given-names> </name><name name-style="western"><surname>Palta</surname><given-names>M</given-names> </name><name name-style="western"><surname>Tenenbaum</surname><given-names>JD</given-names> </name></person-group><article-title>Natural language processing for abstraction of cancer treatment toxicities: accuracy versus human experts</article-title><source>JAMIA Open</source><year>2021</year><month>02</month><day>15</day><volume>3</volume><issue>4</issue><fpage>513</fpage><lpage>517</lpage><pub-id pub-id-type="doi">10.1093/jamiaopen/ooaa064</pub-id></nlm-citation></ref><ref id="ref93"><label>93</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gregg</surname><given-names>JR</given-names> </name><name name-style="western"><surname>Lang</surname><given-names>M</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>LL</given-names> </name><etal/></person-group><article-title>Automating the determination of prostate cancer risk strata from electronic medical records</article-title><source>JCO Clin Cancer Inform</source><year>2017</year><volume>1</volume><issue>1</issue><fpage>1</fpage><lpage>8</lpage><pub-id pub-id-type="doi">10.1200/CCI.16.00045</pub-id><pub-id pub-id-type="medline">29541700</pub-id></nlm-citation></ref><ref id="ref94"><label>94</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>K</given-names> </name><name name-style="western"><surname>Banerjee</surname><given-names>I</given-names> </name><name name-style="western"><surname>Magnani</surname><given-names>CJ</given-names> </name><name name-style="western"><surname>Blayney</surname><given-names>DW</given-names> </name><name name-style="western"><surname>Brooks</surname><given-names>JD</given-names> </name><name name-style="western"><surname>Hernandez-Boussard</surname><given-names>T</given-names> </name></person-group><article-title>Clinical documentation to predict factors associated with urinary incontinence following prostatectomy for prostate cancer</article-title><source>Res Rep Urol</source><year>2020</year><volume>12</volume><fpage>7</fpage><lpage>14</lpage><pub-id pub-id-type="doi">10.2147/RRU.S234178</pub-id><pub-id pub-id-type="medline">32158720</pub-id></nlm-citation></ref><ref id="ref95"><label>95</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bozkurt</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kan</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Ferrari</surname><given-names>MK</given-names> </name><etal/></person-group><article-title>Is it possible to automatically assess pretreatment digital rectal examination documentation using natural language processing? A single-centre retrospective study</article-title><source>BMJ Open</source><year>2019</year><month>07</month><day>18</day><volume>9</volume><issue>7</issue><fpage>e027182</fpage><pub-id pub-id-type="doi">10.1136/bmjopen-2018-027182</pub-id><pub-id pub-id-type="medline">31324681</pub-id></nlm-citation></ref><ref id="ref96"><label>96</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Laios</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kalampokis</surname><given-names>E</given-names> </name><name name-style="western"><surname>Mamalis</surname><given-names>ME</given-names> </name><etal/></person-group><article-title>RoBERTa-assisted outcome prediction in ovarian cancer cytoreductive surgery using operative notes</article-title><source>Cancer Control</source><year>2023</year><volume>30</volume><fpage>10732748231209892</fpage><pub-id pub-id-type="doi">10.1177/10732748231209892</pub-id><pub-id pub-id-type="medline">37915208</pub-id></nlm-citation></ref><ref id="ref97"><label>97</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Joffe</surname><given-names>E</given-names> </name><name name-style="western"><surname>Pettigrew</surname><given-names>EJ</given-names> </name><name name-style="western"><surname>Herskovic</surname><given-names>JR</given-names> </name><name name-style="western"><surname>Bearden</surname><given-names>CF</given-names> </name><name name-style="western"><surname>Bernstam</surname><given-names>EV</given-names> </name></person-group><article-title>Expert guided natural language processing using one-class classification</article-title><source>J Am Med Inform Assoc</source><year>2015</year><month>09</month><volume>22</volume><issue>5</issue><fpage>962</fpage><lpage>966</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocv010</pub-id><pub-id pub-id-type="medline">26063744</pub-id></nlm-citation></ref><ref id="ref98"><label>98</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Coquet</surname><given-names>J</given-names> </name><name name-style="western"><surname>Bozkurt</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kan</surname><given-names>KM</given-names> </name><etal/></person-group><article-title>Comparison of orthogonal NLP methods for clinical phenotyping and assessment of bone scan utilization among prostate cancer patients</article-title><source>J Biomed Inform</source><year>2019</year><month>06</month><volume>94</volume><fpage>103184</fpage><pub-id pub-id-type="doi">10.1016/j.jbi.2019.103184</pub-id><pub-id pub-id-type="medline">31014980</pub-id></nlm-citation></ref><ref id="ref99"><label>99</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bozkurt</surname><given-names>S</given-names> </name><name name-style="western"><surname>Park</surname><given-names>JI</given-names> </name><name name-style="western"><surname>Kan</surname><given-names>KM</given-names> </name><etal/></person-group><article-title>An automated feature engineering for digital rectal examination documentation using natural language processing</article-title><source>AMIA Annu Symp Proc</source><year>2018</year><volume>2018</volume><issue>288</issue><fpage>288</fpage><lpage>294</lpage><pub-id pub-id-type="medline">30815067</pub-id></nlm-citation></ref><ref id="ref100"><label>100</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sanyal</surname><given-names>J</given-names> </name><name name-style="western"><surname>Tariq</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kurian</surname><given-names>AW</given-names> </name><name name-style="western"><surname>Rubin</surname><given-names>D</given-names> </name><name name-style="western"><surname>Banerjee</surname><given-names>I</given-names> </name></person-group><article-title>Weakly supervised temporal model for prediction of breast cancer distant recurrence</article-title><source>Sci Rep</source><year>2021</year><month>05</month><day>4</day><volume>11</volume><issue>1</issue><fpage>9461</fpage><pub-id pub-id-type="doi">10.1038/s41598-021-89033-6</pub-id><pub-id pub-id-type="medline">33947927</pub-id></nlm-citation></ref><ref id="ref101"><label>101</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kehl</surname><given-names>KL</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>W</given-names> </name><name name-style="western"><surname>Gusev</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Artificial intelligence-aided clinical annotation of a large multi-cancer genomic dataset</article-title><source>Nat Commun</source><year>2021</year><month>12</month><day>15</day><volume>12</volume><issue>1</issue><fpage>7304</fpage><pub-id pub-id-type="doi">10.1038/s41467-021-27358-6</pub-id><pub-id pub-id-type="medline">34911934</pub-id></nlm-citation></ref><ref id="ref102"><label>102</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>S</given-names> </name><name name-style="western"><surname>Guevara</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ramirez</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Natural language processing to automatically extract the presence and severity of esophagitis in notes of patients undergoing radiotherapy</article-title><source>JCO Clin Cancer Inform</source><year>2023</year><month>07</month><volume>7</volume><fpage>e2300048</fpage><pub-id pub-id-type="doi">10.1200/CCI.23.00048</pub-id><pub-id pub-id-type="medline">37506330</pub-id></nlm-citation></ref><ref id="ref103"><label>103</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lindvall</surname><given-names>C</given-names> </name><name name-style="western"><surname>Deng</surname><given-names>CY</given-names> </name><name name-style="western"><surname>Agaronnik</surname><given-names>ND</given-names> </name><etal/></person-group><article-title>Deep learning for cancer symptoms monitoring on the basis of electronic health record unstructured clinical notes</article-title><source>JCO Clin Cancer Inform</source><year>2022</year><month>06</month><volume>6</volume><fpage>e2100136</fpage><pub-id pub-id-type="doi">10.1200/CCI.21.00136</pub-id><pub-id pub-id-type="medline">35714301</pub-id></nlm-citation></ref><ref id="ref104"><label>104</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yim</surname><given-names>WW</given-names> </name><name name-style="western"><surname>Kwan</surname><given-names>SW</given-names> </name><name name-style="western"><surname>Johnson</surname><given-names>G</given-names> </name><name name-style="western"><surname>Yetisgen</surname><given-names>M</given-names> </name></person-group><article-title>Classification of hepatocellular carcinoma stages from free-text clinical and radiology reports</article-title><source>AMIA Annu Symp Proc</source><year>2017</year><volume>2017</volume><issue>1858</issue><fpage>1858</fpage><lpage>1867</lpage><pub-id pub-id-type="medline">29854257</pub-id></nlm-citation></ref><ref id="ref105"><label>105</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Derton</surname><given-names>A</given-names> </name><name name-style="western"><surname>Guevara</surname><given-names>M</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Natural language processing methods to empirically explore social contexts and needs in cancer patient notes</article-title><source>JCO Clin Cancer Inform</source><year>2023</year><month>05</month><volume>7</volume><fpage>e2200196</fpage><pub-id pub-id-type="doi">10.1200/CCI.22.00196</pub-id><pub-id pub-id-type="medline">37235847</pub-id></nlm-citation></ref><ref id="ref106"><label>106</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Khor</surname><given-names>RC</given-names> </name><name name-style="western"><surname>Nguyen</surname><given-names>A</given-names> </name><name name-style="western"><surname>O&#x2019;Dwyer</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Extracting tumour prognostic factors from a diverse electronic record dataset in genito-urinary oncology</article-title><source>Int J Med Inform</source><year>2019</year><month>01</month><volume>121</volume><fpage>53</fpage><lpage>57</lpage><pub-id pub-id-type="doi">10.1016/j.ijmedinf.2018.10.008</pub-id><pub-id pub-id-type="medline">30545489</pub-id></nlm-citation></ref><ref id="ref107"><label>107</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Delorme</surname><given-names>J</given-names> </name><name name-style="western"><surname>Charvet</surname><given-names>V</given-names> </name><name name-style="western"><surname>Wartelle</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Natural language processing for patient selection in Phase I or II oncology clinical trials</article-title><source>JCO Clin Cancer Inform</source><year>2021</year><month>06</month><volume>5</volume><fpage>709</fpage><lpage>718</lpage><pub-id pub-id-type="doi">10.1200/CCI.21.00003</pub-id><pub-id pub-id-type="medline">34197179</pub-id></nlm-citation></ref><ref id="ref108"><label>108</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kehl</surname><given-names>KL</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>W</given-names> </name><name name-style="western"><surname>Lepisto</surname><given-names>E</given-names> </name><etal/></person-group><article-title>Natural language processing to ascertain cancer outcomes from medical oncologist notes</article-title><source>JCO Clin Cancer Inform</source><year>2020</year><month>08</month><volume>4</volume><fpage>680</fpage><lpage>690</lpage><pub-id pub-id-type="doi">10.1200/CCI.20.00020</pub-id><pub-id pub-id-type="medline">32755459</pub-id></nlm-citation></ref><ref id="ref109"><label>109</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>DiMartino</surname><given-names>L</given-names> </name><name name-style="western"><surname>Miano</surname><given-names>T</given-names> </name><name name-style="western"><surname>Wessell</surname><given-names>K</given-names> </name><name name-style="western"><surname>Bohac</surname><given-names>B</given-names> </name><name name-style="western"><surname>Hanson</surname><given-names>LC</given-names> </name></person-group><article-title>Identification of uncontrolled symptoms in cancer patients using natural language processing</article-title><source>J Pain Symptom Manage</source><year>2022</year><month>04</month><volume>63</volume><issue>4</issue><fpage>610</fpage><lpage>617</lpage><pub-id pub-id-type="doi">10.1016/j.jpainsymman.2021.10.014</pub-id><pub-id pub-id-type="medline">34743011</pub-id></nlm-citation></ref><ref id="ref110"><label>110</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zeng</surname><given-names>J</given-names> </name><name name-style="western"><surname>Banerjee</surname><given-names>I</given-names> </name><name name-style="western"><surname>Henry</surname><given-names>AS</given-names> </name><etal/></person-group><article-title>Natural language processing to identify cancer treatments with electronic medical records</article-title><source>JCO Clin Cancer Inform</source><year>2021</year><month>04</month><volume>5</volume><fpage>379</fpage><lpage>393</lpage><pub-id pub-id-type="doi">10.1200/CCI.20.00173</pub-id><pub-id pub-id-type="medline">33822653</pub-id></nlm-citation></ref><ref id="ref111"><label>111</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bozkurt</surname><given-names>S</given-names> </name><name name-style="western"><surname>Paul</surname><given-names>R</given-names> </name><name name-style="western"><surname>Coquet</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Phenotyping severity of patient-centered outcomes using clinical notes: a prostate cancer use case</article-title><source>Learn Health Syst</source><year>2020</year><month>10</month><volume>4</volume><issue>4</issue><fpage>e10237</fpage><pub-id pub-id-type="doi">10.1002/lrh2.10237</pub-id><pub-id pub-id-type="medline">33083539</pub-id></nlm-citation></ref><ref id="ref112"><label>112</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Meystre</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Heider</surname><given-names>PM</given-names> </name><name name-style="western"><surname>Cates</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Piloting an automated clinical trial eligibility surveillance and provider alert system based on artificial intelligence and standard data models</article-title><source>BMC Med Res Methodol</source><year>2023</year><month>04</month><day>11</day><volume>23</volume><issue>1</issue><fpage>88</fpage><pub-id pub-id-type="doi">10.1186/s12874-023-01916-6</pub-id><pub-id pub-id-type="medline">37041475</pub-id></nlm-citation></ref><ref id="ref113"><label>113</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Araki</surname><given-names>K</given-names> </name><name name-style="western"><surname>Matsumoto</surname><given-names>N</given-names> </name><name name-style="western"><surname>Togo</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Real-world treatment response in Japanese patients with cancer using unstructured data from electronic health records</article-title><source>Health Technol</source><year>2023</year><month>03</month><volume>13</volume><issue>2</issue><fpage>253</fpage><lpage>262</lpage><pub-id pub-id-type="doi">10.1007/s12553-023-00739-1</pub-id></nlm-citation></ref><ref id="ref114"><label>114</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Guan</surname><given-names>M</given-names> </name><name name-style="western"><surname>Cho</surname><given-names>S</given-names> </name><name name-style="western"><surname>Petro</surname><given-names>R</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>W</given-names> </name><name name-style="western"><surname>Pasche</surname><given-names>B</given-names> </name><name name-style="western"><surname>Topaloglu</surname><given-names>U</given-names> </name></person-group><article-title>Natural language processing and recurrent network models for identifying genomic mutation-associated cancer treatment change from patient progress notes</article-title><source>JAMIA Open</source><year>2019</year><month>04</month><volume>2</volume><issue>1</issue><fpage>139</fpage><lpage>149</lpage><pub-id pub-id-type="doi">10.1093/jamiaopen/ooy061</pub-id><pub-id pub-id-type="medline">30944913</pub-id></nlm-citation></ref><ref id="ref115"><label>115</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>F</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>H</given-names> </name></person-group><article-title>An investigation of single-domain and multidomain medication and adverse drug event relation extraction from electronic health record notes using advanced deep learning models</article-title><source>J Am Med Inform Assoc</source><year>2019</year><month>07</month><day>1</day><volume>26</volume><issue>7</issue><fpage>646</fpage><lpage>654</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocz018</pub-id></nlm-citation></ref><ref id="ref116"><label>116</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dai</surname><given-names>HJ</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>FD</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>CW</given-names> </name><name name-style="western"><surname>Su</surname><given-names>CH</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>CS</given-names> </name><name name-style="western"><surname>Jonnagaddala</surname><given-names>J</given-names> </name></person-group><article-title>Cohort selection for clinical trials using multiple instance learning</article-title><source>J Biomed Inform</source><year>2020</year><month>07</month><volume>107</volume><fpage>103438</fpage><pub-id pub-id-type="doi">10.1016/j.jbi.2020.103438</pub-id><pub-id pub-id-type="medline">32360937</pub-id></nlm-citation></ref><ref id="ref117"><label>117</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Forsyth</surname><given-names>AW</given-names> </name><name name-style="western"><surname>Barzilay</surname><given-names>R</given-names> </name><name name-style="western"><surname>Hughes</surname><given-names>KS</given-names> </name><etal/></person-group><article-title>Machine learning methods to extract documentation of breast cancer symptoms from electronic health records</article-title><source>J Pain Symptom Manage</source><year>2018</year><month>06</month><volume>55</volume><issue>6</issue><fpage>1492</fpage><lpage>1499</lpage><pub-id pub-id-type="doi">10.1016/j.jpainsymman.2018.02.016</pub-id><pub-id pub-id-type="medline">29496537</pub-id></nlm-citation></ref><ref id="ref118"><label>118</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yuan</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Cai</surname><given-names>T</given-names> </name><name name-style="western"><surname>Hong</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Performance of a machine learning algorithm using electronic health record data to identify and estimate survival in a longitudinal cohort of patients with lung cancer</article-title><source>JAMA Netw Open</source><year>2021</year><month>07</month><day>1</day><volume>4</volume><issue>7</issue><fpage>e2114723</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2021.14723</pub-id><pub-id pub-id-type="medline">34232304</pub-id></nlm-citation></ref><ref id="ref119"><label>119</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Banerjee</surname><given-names>I</given-names> </name><name name-style="western"><surname>Li</surname><given-names>K</given-names> </name><name name-style="western"><surname>Seneviratne</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Weakly supervised natural language processing for assessing patient-centered outcome following prostate cancer treatment</article-title><source>JAMIA Open</source><year>2019</year><month>04</month><volume>2</volume><issue>1</issue><fpage>150</fpage><lpage>159</lpage><pub-id pub-id-type="doi">10.1093/jamiaopen/ooy057</pub-id><pub-id pub-id-type="medline">31032481</pub-id></nlm-citation></ref><ref id="ref120"><label>120</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Agaronnik</surname><given-names>ND</given-names> </name><name name-style="western"><surname>Lindvall</surname><given-names>C</given-names> </name><name name-style="western"><surname>El-Jawahri</surname><given-names>A</given-names> </name><name name-style="western"><surname>He</surname><given-names>W</given-names> </name><name name-style="western"><surname>Iezzoni</surname><given-names>LI</given-names> </name></person-group><article-title>Challenges of developing a natural language processing method with electronic health records to identify persons with chronic mobility disability</article-title><source>Arch Phys Med Rehabil</source><year>2020</year><month>10</month><volume>101</volume><issue>10</issue><fpage>1739</fpage><lpage>1746</lpage><pub-id pub-id-type="doi">10.1016/j.apmr.2020.04.024</pub-id><pub-id pub-id-type="medline">32446905</pub-id></nlm-citation></ref><ref id="ref121"><label>121</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Leis</surname><given-names>A</given-names> </name><name name-style="western"><surname>Casadevall</surname><given-names>D</given-names> </name><name name-style="western"><surname>Albanell</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Exploring the association of cancer and depression in electronic health records: combining encoded diagnosis and mining free-text clinical notes</article-title><source>JMIR Cancer</source><year>2022</year><month>07</month><day>11</day><volume>8</volume><issue>3</issue><fpage>e39003</fpage><pub-id pub-id-type="doi">10.2196/39003</pub-id><pub-id pub-id-type="medline">35816382</pub-id></nlm-citation></ref><ref id="ref122"><label>122</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lin</surname><given-names>FPY</given-names> </name><name name-style="western"><surname>Pokorny</surname><given-names>A</given-names> </name><name name-style="western"><surname>Teng</surname><given-names>C</given-names> </name><name name-style="western"><surname>Epstein</surname><given-names>RJ</given-names> </name></person-group><article-title>TEPAPA: a novel in silico feature learning pipeline for mining prognostic and associative factors from text-based electronic medical records</article-title><source>Sci Rep</source><year>2017</year><month>07</month><day>31</day><volume>7</volume><issue>1</issue><fpage>6918</fpage><pub-id pub-id-type="doi">10.1038/s41598-017-07111-0</pub-id><pub-id pub-id-type="medline">28761061</pub-id></nlm-citation></ref><ref id="ref123"><label>123</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Redd</surname><given-names>DF</given-names> </name><name name-style="western"><surname>Shao</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zeng-Treitler</surname><given-names>Q</given-names> </name><etal/></person-group><article-title>Identification of colorectal cancer using structured and free text clinical data</article-title><source>Health Informatics J</source><year>2022</year><volume>28</volume><issue>4</issue><fpage>14604582221134406</fpage><pub-id pub-id-type="doi">10.1177/14604582221134406</pub-id><pub-id pub-id-type="medline">36300566</pub-id></nlm-citation></ref><ref id="ref124"><label>124</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>F</given-names> </name><name name-style="western"><surname>Pradhan</surname><given-names>R</given-names> </name><name name-style="western"><surname>Druhl</surname><given-names>E</given-names> </name><etal/></person-group><article-title>Learning to detect and understand drug discontinuation events from clinical narratives</article-title><source>J Am Med Inform Assoc</source><year>2019</year><month>10</month><day>1</day><volume>26</volume><issue>10</issue><fpage>943</fpage><lpage>951</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocz048</pub-id><pub-id pub-id-type="medline">31034028</pub-id></nlm-citation></ref><ref id="ref125"><label>125</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>K</given-names> </name><name name-style="western"><surname>Kulkarni</surname><given-names>O</given-names> </name><name name-style="western"><surname>Witteveen-Lane</surname><given-names>M</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>B</given-names> </name><name name-style="western"><surname>Chesla</surname><given-names>D</given-names> </name></person-group><article-title>MetBERT: a generalizable and pre-trained deep learning model for the prediction of metastatic cancer from clinical notes</article-title><source>AMIA Jt Summits Transl Sci Proc</source><year>2022</year><volume>2022</volume><issue>331</issue><fpage>331</fpage><lpage>338</lpage><pub-id pub-id-type="medline">35854741</pub-id></nlm-citation></ref><ref id="ref126"><label>126</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Koleck</surname><given-names>TA</given-names> </name><name name-style="western"><surname>Topaz</surname><given-names>M</given-names> </name><name name-style="western"><surname>Tatonetti</surname><given-names>NP</given-names> </name><etal/></person-group><article-title>Characterizing shared and distinct symptom clusters in common chronic conditions through natural language processing of nursing notes</article-title><source>Res Nurs Health</source><year>2021</year><month>12</month><volume>44</volume><issue>6</issue><fpage>906</fpage><lpage>919</lpage><pub-id pub-id-type="doi">10.1002/nur.22190</pub-id><pub-id pub-id-type="medline">34637147</pub-id></nlm-citation></ref><ref id="ref127"><label>127</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Ehrentraut</surname><given-names>C</given-names> </name><name name-style="western"><surname>Sundstr&#x00F6;m</surname><given-names>K</given-names> </name><name name-style="western"><surname>Dalianis</surname><given-names>H</given-names> </name></person-group><article-title>Exploration of known and unknown early symptoms of cervical cancer and development of a symptom spectrum&#x2014;outline of a data and text mining based approach</article-title><conf-name>Proceeding from CAiSE 2015 Industriy Track CEUR Workshop Proc</conf-name><conf-date>Jun 8-12, 2015</conf-date><conf-loc>Stockholm, Sweden</conf-loc><fpage>34</fpage><lpage>44</lpage></nlm-citation></ref><ref id="ref128"><label>128</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Lazic</surname><given-names>I</given-names> </name><name name-style="western"><surname>Jakovljevic</surname><given-names>N</given-names> </name><name name-style="western"><surname>Boban</surname><given-names>J</given-names> </name><name name-style="western"><surname>Nosek</surname><given-names>I</given-names> </name><name name-style="western"><surname>Loncar-Turukalo</surname><given-names>T</given-names> </name></person-group><article-title>Information extraction from clinical records: an example for breast cancer</article-title><conf-name>2022 IEEE 21st Mediterranean Electrotechnical Conference (MELECON)</conf-name><conf-date>Jun 14-16, 2022</conf-date><pub-id pub-id-type="doi">10.1109/MELECON53508.2022.9842995</pub-id></nlm-citation></ref><ref id="ref129"><label>129</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stevens</surname><given-names>M</given-names> </name><name name-style="western"><surname>Kennedy</surname><given-names>G</given-names> </name><name name-style="western"><surname>Churches</surname><given-names>T</given-names> </name></person-group><article-title>Applying and improving a publicly available medication NER pipeline in a clinical cancer EMR</article-title><source>Stud Health Technol Inform</source><year>2024</year><month>01</month><day>25</day><volume>310</volume><fpage>679</fpage><lpage>684</lpage><pub-id pub-id-type="doi">10.3233/SHTI231051</pub-id><pub-id pub-id-type="medline">38269895</pub-id></nlm-citation></ref><ref id="ref130"><label>130</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Luo</surname><given-names>X</given-names> </name><name name-style="western"><surname>Gandhi</surname><given-names>P</given-names> </name><name name-style="western"><surname>Storey</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Han</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>K</given-names> </name></person-group><article-title>A computational framework to analyze the associations between symptoms and cancer patient attributes post chemotherapy using EHR data</article-title><source>IEEE J Biomed Health Inform</source><year>2021</year><month>11</month><volume>25</volume><issue>11</issue><fpage>4098</fpage><lpage>4109</lpage><pub-id pub-id-type="doi">10.1109/JBHI.2021.3117238</pub-id><pub-id pub-id-type="medline">34613922</pub-id></nlm-citation></ref><ref id="ref131"><label>131</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Luo</surname><given-names>X</given-names> </name><name name-style="western"><surname>Storey</surname><given-names>S</given-names> </name><name name-style="western"><surname>Gandhi</surname><given-names>P</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Metzger</surname><given-names>M</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>K</given-names> </name></person-group><article-title>Analyzing the symptoms in colorectal and breast cancer patients with or without type 2 diabetes using EHR data</article-title><source>Health Informatics J</source><year>2021</year><volume>27</volume><issue>1</issue><fpage>14604582211000785</fpage><pub-id pub-id-type="doi">10.1177/14604582211000785</pub-id><pub-id pub-id-type="medline">33726552</pub-id></nlm-citation></ref><ref id="ref132"><label>132</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schiappa</surname><given-names>R</given-names> </name><name name-style="western"><surname>Contu</surname><given-names>S</given-names> </name><name name-style="western"><surname>Culie</surname><given-names>D</given-names> </name><etal/></person-group><article-title>RUBY: natural language processing of French electronic medical records for breast cancer research</article-title><source>JCO Clin Cancer Inform</source><year>2022</year><month>07</month><volume>6</volume><issue>6</issue><fpage>e2100199</fpage><pub-id pub-id-type="doi">10.1200/CCI.21.00199</pub-id><pub-id pub-id-type="medline">35960900</pub-id></nlm-citation></ref><ref id="ref133"><label>133</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tan</surname><given-names>HJ</given-names> </name><name name-style="western"><surname>Clarke</surname><given-names>R</given-names> </name><name name-style="western"><surname>Chamie</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Development and validation of an automated method to identify patients undergoing radical cystectomy for bladder cancer using natural language processing</article-title><source>Urol Pract</source><year>2017</year><month>09</month><volume>4</volume><issue>5</issue><fpage>365</fpage><lpage>372</lpage><pub-id pub-id-type="doi">10.1016/j.urpr.2016.09.011</pub-id><pub-id pub-id-type="medline">37592698</pub-id></nlm-citation></ref><ref id="ref134"><label>134</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Agaronnik</surname><given-names>N</given-names> </name><name name-style="western"><surname>Lindvall</surname><given-names>C</given-names> </name><name name-style="western"><surname>El-Jawahri</surname><given-names>A</given-names> </name><name name-style="western"><surname>He</surname><given-names>W</given-names> </name><name name-style="western"><surname>Iezzoni</surname><given-names>L</given-names> </name></person-group><article-title>Use of natural language processing to assess frequency of functional status documentation for patients newly diagnosed with colorectal cancer</article-title><source>JAMA Oncol</source><year>2020</year><month>10</month><day>1</day><volume>6</volume><issue>10</issue><fpage>1628</fpage><lpage>1630</lpage><pub-id pub-id-type="doi">10.1001/jamaoncol.2020.2708</pub-id><pub-id pub-id-type="medline">32880603</pub-id></nlm-citation></ref><ref id="ref135"><label>135</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Afzal</surname><given-names>M</given-names> </name><name name-style="western"><surname>Hussain</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ali Khan</surname><given-names>W</given-names> </name><etal/></person-group><article-title>Comprehensible knowledge model creation for cancer treatment decision making</article-title><source>Comput Biol Med</source><year>2017</year><month>03</month><volume>82</volume><fpage>119</fpage><lpage>129</lpage><pub-id pub-id-type="doi">10.1016/j.compbiomed.2017.01.010</pub-id></nlm-citation></ref><ref id="ref136"><label>136</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Loda</surname><given-names>S</given-names> </name><name name-style="western"><surname>Krebs</surname><given-names>J</given-names> </name><name name-style="western"><surname>Danhof</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Exploration of artificial intelligence use with ARIES in multiple myeloma research</article-title><source>J Clin Med</source><year>2019</year><month>07</month><day>9</day><volume>8</volume><issue>7</issue><fpage>999</fpage><pub-id pub-id-type="doi">10.3390/jcm8070999</pub-id><pub-id pub-id-type="medline">31324026</pub-id></nlm-citation></ref><ref id="ref137"><label>137</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Tahabi</surname><given-names>FM</given-names> </name><name name-style="western"><surname>Storey</surname><given-names>S</given-names> </name><name name-style="western"><surname>Luo</surname><given-names>X</given-names> </name></person-group><article-title>SymptomGraph: identifying symptom clusters from narrative clinical notes using graph clustering</article-title><conf-name>SAC &#x2019;23: Proceedings of the 38th ACM/SIGAPP Symposium on Applied Computing</conf-name><conf-date>Mar 27-31, 2023</conf-date><pub-id pub-id-type="doi">10.1145/3555776.3577685</pub-id></nlm-citation></ref><ref id="ref138"><label>138</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Percha</surname><given-names>B</given-names> </name><name name-style="western"><surname>Pisapati</surname><given-names>K</given-names> </name><name name-style="western"><surname>Gao</surname><given-names>C</given-names> </name><name name-style="western"><surname>Schmidt</surname><given-names>H</given-names> </name></person-group><article-title>Natural language inference for curation of structured clinical registries from unstructured text</article-title><source>J Am Med Inform Assoc</source><year>2021</year><month>12</month><day>28</day><volume>29</volume><issue>1</issue><fpage>97</fpage><lpage>108</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocab243</pub-id><pub-id pub-id-type="medline">34791282</pub-id></nlm-citation></ref><ref id="ref139"><label>139</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alba</surname><given-names>PR</given-names> </name><name name-style="western"><surname>Gao</surname><given-names>A</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>KM</given-names> </name><etal/></person-group><article-title>Ascertainment of veterans with metastatic prostate cancer in electronic health records: demonstrating the case for natural language processing</article-title><source>JCO Clin Cancer Inform</source><year>2021</year><month>09</month><volume>5</volume><fpage>1005</fpage><lpage>1014</lpage><pub-id pub-id-type="doi">10.1200/CCI.21.00030</pub-id><pub-id pub-id-type="medline">34570630</pub-id></nlm-citation></ref><ref id="ref140"><label>140</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kersloot</surname><given-names>MG</given-names> </name><name name-style="western"><surname>Lau</surname><given-names>F</given-names> </name><name name-style="western"><surname>Abu-Hanna</surname><given-names>A</given-names> </name><name name-style="western"><surname>Arts</surname><given-names>DL</given-names> </name><name name-style="western"><surname>Cornet</surname><given-names>R</given-names> </name></person-group><article-title>Automated SNOMED CT concept and attribute relationship detection through a web-based implementation of cTAKES</article-title><source>J Biomed Semantics</source><year>2019</year><month>09</month><day>18</day><volume>10</volume><issue>1</issue><fpage>14</fpage><pub-id pub-id-type="doi">10.1186/s13326-019-0207-3</pub-id><pub-id pub-id-type="medline">31533810</pub-id></nlm-citation></ref><ref id="ref141"><label>141</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ahmad</surname><given-names>PN</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Khan</surname><given-names>K</given-names> </name><name name-style="western"><surname>Jiang</surname><given-names>T</given-names> </name><name name-style="western"><surname>Burhan</surname><given-names>U</given-names> </name></person-group><article-title>BIR: biomedical information retrieval system for cancer treatment in electronic health record using transformers</article-title><source>Sensors (Basel)</source><year>2023</year><month>11</month><day>23</day><volume>23</volume><issue>23</issue><fpage>9355</fpage><pub-id pub-id-type="doi">10.3390/s23239355</pub-id><pub-id pub-id-type="medline">38067736</pub-id></nlm-citation></ref><ref id="ref142"><label>142</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Jamaluddin</surname><given-names>M</given-names> </name><name name-style="western"><surname>Wibawa</surname><given-names>AD</given-names> </name></person-group><article-title>Patient diagnosis classification based on electronic medical record using text mining and support vector machine</article-title><conf-name>2021 International Seminar on Application for Technology of Information and Communication (iSemantic)</conf-name><conf-date>Sep 18-19, 2021</conf-date><pub-id pub-id-type="doi">10.1109/iSemantic52711.2021.9573178</pub-id></nlm-citation></ref><ref id="ref143"><label>143</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shah</surname><given-names>S</given-names> </name><name name-style="western"><surname>Luo</surname><given-names>X</given-names> </name><name name-style="western"><surname>Kanakasabai</surname><given-names>S</given-names> </name><name name-style="western"><surname>Tuason</surname><given-names>R</given-names> </name><name name-style="western"><surname>Klopper</surname><given-names>G</given-names> </name></person-group><article-title>Neural networks for mining the associations between diseases and symptoms in clinical notes</article-title><source>Health Inf Sci Syst</source><year>2019</year><month>12</month><volume>7</volume><issue>1</issue><fpage>1</fpage><pub-id pub-id-type="doi">10.1007/s13755-018-0062-0</pub-id><pub-id pub-id-type="medline">30588291</pub-id></nlm-citation></ref><ref id="ref144"><label>144</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Rohanian</surname><given-names>O</given-names> </name><name name-style="western"><surname>Jauncey</surname><given-names>H</given-names> </name><name name-style="western"><surname>Nouriborji</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Using bottleneck adapters to identify cancer in clinical notes under low-resource constraints</article-title><conf-name>The 22nd Workshop on Biomedical Natural Language Processing and BioNLP Shared Tasks</conf-name><conf-date>Jun 13, 2023</conf-date><pub-id pub-id-type="doi">10.18653/v1/2023.bionlp-1.5</pub-id></nlm-citation></ref><ref id="ref145"><label>145</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bouvry</surname><given-names>C</given-names> </name><name name-style="western"><surname>Tvardik</surname><given-names>N</given-names> </name><name name-style="western"><surname>Kergourlay</surname><given-names>I</given-names> </name><etal/></person-group><article-title>The SYNODOS Project: system for the normalization and organization of textual medical data for observation in healthcare</article-title><source>IRBM</source><year>2016</year><month>04</month><volume>37</volume><issue>2</issue><fpage>109</fpage><lpage>115</lpage><pub-id pub-id-type="doi">10.1016/j.irbm.2016.03.002</pub-id></nlm-citation></ref><ref id="ref146"><label>146</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rahimian</surname><given-names>M</given-names> </name><name name-style="western"><surname>Warner</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Jain</surname><given-names>SK</given-names> </name><name name-style="western"><surname>Davis</surname><given-names>RB</given-names> </name><name name-style="western"><surname>Zerillo</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Joyce</surname><given-names>RM</given-names> </name></person-group><article-title>Significant and distinctive n-grams in oncology notes: a text-mining method to analyze the effect of OpenNotes on clinical documentation</article-title><source>JCO Clin Cancer Inform</source><year>2019</year><month>06</month><volume>3</volume><fpage>1</fpage><lpage>9</lpage><pub-id pub-id-type="doi">10.1200/CCI.19.00012</pub-id><pub-id pub-id-type="medline">31184919</pub-id></nlm-citation></ref><ref id="ref147"><label>147</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>X</given-names> </name><name name-style="western"><surname>Xie</surname><given-names>H</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>FL</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Hao</surname><given-names>T</given-names> </name></person-group><article-title>A bibliometric analysis of natural language processing in medical research</article-title><source>BMC Med Inform Decis Mak</source><year>2018</year><month>03</month><day>22</day><volume>18</volume><issue>Suppl 1</issue><fpage>14</fpage><pub-id pub-id-type="doi">10.1186/s12911-018-0594-x</pub-id><pub-id pub-id-type="medline">29589569</pub-id></nlm-citation></ref><ref id="ref148"><label>148</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Casey</surname><given-names>A</given-names> </name><name name-style="western"><surname>Davidson</surname><given-names>E</given-names> </name><name name-style="western"><surname>Poon</surname><given-names>M</given-names> </name><etal/></person-group><article-title>A systematic review of natural language processing applied to radiology reports</article-title><source>BMC Med Inform Decis Mak</source><year>2021</year><month>06</month><day>3</day><volume>21</volume><issue>1</issue><fpage>179</fpage><pub-id pub-id-type="doi">10.1186/s12911-021-01533-7</pub-id><pub-id pub-id-type="medline">34082729</pub-id></nlm-citation></ref><ref id="ref149"><label>149</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Goff</surname><given-names>DJ</given-names> </name><name name-style="western"><surname>Loehfelm</surname><given-names>TW</given-names> </name></person-group><article-title>Automated radiology report summarization using an open-source natural language processing pipeline</article-title><source>J Digit Imaging</source><year>2018</year><month>04</month><volume>31</volume><issue>2</issue><fpage>185</fpage><lpage>192</lpage><pub-id pub-id-type="doi">10.1007/s10278-017-0030-2</pub-id><pub-id pub-id-type="medline">29086081</pub-id></nlm-citation></ref><ref id="ref150"><label>150</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dong</surname><given-names>H</given-names> </name><name name-style="western"><surname>Su&#x00E1;rez-Paniagua</surname><given-names>V</given-names> </name><name name-style="western"><surname>Whiteley</surname><given-names>W</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>H</given-names> </name></person-group><article-title>Explainable automated coding of clinical notes using hierarchical label-wise attention networks and label embedding initialisation</article-title><source>J Biomed Inform</source><year>2021</year><month>04</month><volume>116</volume><fpage>103728</fpage><pub-id pub-id-type="doi">10.1016/j.jbi.2021.103728</pub-id><pub-id pub-id-type="medline">33711543</pub-id></nlm-citation></ref><ref id="ref151"><label>151</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Payrovnaziri</surname><given-names>SN</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Rengifo-Moreno</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Explainable artificial intelligence models using real-world electronic health record data: a systematic scoping review</article-title><source>J Am Med Inform Assoc</source><year>2020</year><month>07</month><day>1</day><volume>27</volume><issue>7</issue><fpage>1173</fpage><lpage>1185</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocaa053</pub-id><pub-id pub-id-type="medline">32417928</pub-id></nlm-citation></ref><ref id="ref152"><label>152</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>S</given-names> </name><name name-style="western"><surname>Roberts</surname><given-names>K</given-names> </name><name name-style="western"><surname>Datta</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Deep learning in clinical natural language processing: a methodical review</article-title><source>J Am Med Inform Assoc</source><year>2020</year><month>03</month><day>1</day><volume>27</volume><issue>3</issue><fpage>457</fpage><lpage>470</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocz200</pub-id></nlm-citation></ref><ref id="ref153"><label>153</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Manning</surname><given-names>CD</given-names> </name><name name-style="western"><surname>Raghavan</surname><given-names>P</given-names> </name><name name-style="western"><surname>Sch&#x00FC;tze</surname><given-names>H</given-names> </name></person-group><source>Introduction to Information Retrieval</source><year>2008</year><publisher-name>Cambridge University Press</publisher-name><pub-id pub-id-type="other">9780511809071</pub-id></nlm-citation></ref><ref id="ref154"><label>154</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shivade</surname><given-names>C</given-names> </name><name name-style="western"><surname>Raghavan</surname><given-names>P</given-names> </name><name name-style="western"><surname>Fosler-Lussier</surname><given-names>E</given-names> </name><etal/></person-group><article-title>A review of approaches to identifying patient phenotype cohorts using electronic health records</article-title><source>J Am Med Inform Assoc</source><year>2014</year><month>03</month><volume>21</volume><issue>2</issue><fpage>221</fpage><lpage>230</lpage><pub-id pub-id-type="doi">10.1136/amiajnl-2013-001935</pub-id></nlm-citation></ref><ref id="ref155"><label>155</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhu</surname><given-names>M</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>H</given-names> </name><name name-style="western"><surname>Jiang</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Large language model trained on clinical oncology data predicts cancer progression</article-title><source>NPJ Digit Med</source><year>2025</year><volume>8</volume><issue>1</issue><fpage>1</fpage><lpage>15</lpage><pub-id pub-id-type="doi">10.1038/s41746-025-01780-2</pub-id></nlm-citation></ref><ref id="ref156"><label>156</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gottlieb</surname><given-names>S</given-names> </name></person-group><article-title>New FDA policies could limit the full value of AI in medicine</article-title><source>JAMA Health Forum</source><year>2025</year><month>02</month><day>7</day><volume>6</volume><issue>2</issue><fpage>e250289</fpage><pub-id pub-id-type="doi">10.1001/jamahealthforum.2025.0289</pub-id><pub-id pub-id-type="medline">39913129</pub-id></nlm-citation></ref><ref id="ref157"><label>157</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Van Laere</surname><given-names>S</given-names> </name><name name-style="western"><surname>Muylle</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Cornu</surname><given-names>P</given-names> </name></person-group><article-title>Clinical decision support and new regulatory frameworks for medical devices: are we ready for it?&#x2014;A viewpoint paper</article-title><source>Int J Health Policy Manag</source><year>2022</year><month>12</month><day>19</day><volume>11</volume><issue>12</issue><fpage>3159</fpage><lpage>3163</lpage><pub-id pub-id-type="doi">10.34172/ijhpm.2021.144</pub-id><pub-id pub-id-type="medline">34814678</pub-id></nlm-citation></ref><ref id="ref158"><label>158</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Artsi</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Sorin</surname><given-names>V</given-names> </name><name name-style="western"><surname>Glicksberg</surname><given-names>BS</given-names> </name><etal/></person-group><article-title>Challenges of implementing LLMs in clinical practice: perspectives</article-title><source>J Clin Med</source><year>2025</year><month>09</month><day>1</day><volume>14</volume><issue>17</issue><fpage>6169</fpage><pub-id pub-id-type="doi">10.3390/jcm14176169</pub-id><pub-id pub-id-type="medline">40943929</pub-id></nlm-citation></ref><ref id="ref159"><label>159</label><nlm-citation citation-type="web"><article-title>General Data Protection Regulation (GDPR)</article-title><source>Intersoft Consulting</source><access-date>2025-01-05</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://gdpr-info.eu/">https://gdpr-info.eu/</ext-link></comment></nlm-citation></ref><ref id="ref160"><label>160</label><nlm-citation citation-type="web"><article-title>Health Insurance Portability and Accountability Act (HIPAA)</article-title><source>US Department of Health and Human Services</source><access-date>2025-01-05</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.hhs.gov/hipaa/index.html">https://www.hhs.gov/hipaa/index.html</ext-link></comment></nlm-citation></ref><ref id="ref161"><label>161</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moorthie</surname><given-names>S</given-names> </name><name name-style="western"><surname>Hayat</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Rapid systematic review to identify key barriers to access, linkage, and use of local authority administrative data for population health research, practice, and policy in the United Kingdom</article-title><source>BMC Public Health</source><year>2022</year><month>06</month><day>28</day><volume>22</volume><issue>1</issue><fpage>1263</fpage><pub-id pub-id-type="doi">10.1186/s12889-022-13187-9</pub-id><pub-id pub-id-type="medline">35764951</pub-id></nlm-citation></ref><ref id="ref162"><label>162</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Alsentzer</surname><given-names>E</given-names> </name></person-group><article-title>Publicly available clinical BERT embeddings</article-title><conf-name>Proceedings of the 2nd Clinical Natural Language Processing Workshop ACL Anthology 2019</conf-name><conf-date>Jun 7, 2019</conf-date><pub-id pub-id-type="doi">10.18653/v1/W19-1909</pub-id></nlm-citation></ref><ref id="ref163"><label>163</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Peng</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Yan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>Z</given-names> </name></person-group><article-title>Transfer learning in biomedical natural language processing: an evaluation of BERT and ELMo on ten benchmarking datasets</article-title><year>2019</year><conf-name>BioNLP 2019&#x2014;SIGBioMed Workshop on Biomedical Natural Language Processing, Proceedings of the 18th BioNLP Workshop and Shared Task</conf-name><conf-date>Aug 1, 2019</conf-date><conf-loc>Florence, Italy</conf-loc><fpage>58</fpage><lpage>65</lpage><pub-id pub-id-type="doi">10.18653/v1/W19-5006</pub-id></nlm-citation></ref><ref id="ref164"><label>164</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Miranda-Escalada</surname><given-names>A</given-names> </name><name name-style="western"><surname>Farr&#x00E9;</surname><given-names>E</given-names> </name><name name-style="western"><surname>Krallinger</surname><given-names>M</given-names> </name></person-group><article-title>Named entity recognition, concept normalization and clinical coding: overview of the cantemist track for cancer text mining in spanish, corpus, guidelines, methods and results published online first</article-title><conf-name>Iberian Languages Evaluation Forum 2020</conf-name><conf-date>Sep 22-24, 2024</conf-date><pub-id pub-id-type="doi">10.5281/zenodo.3773228</pub-id></nlm-citation></ref><ref id="ref165"><label>165</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Agrawal</surname><given-names>M</given-names> </name><name name-style="western"><surname>Hegselmann</surname><given-names>S</given-names> </name><name name-style="western"><surname>Lang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Sontag</surname><given-names>D</given-names> </name></person-group><article-title>Large language models are few-shot clinical information extractors</article-title><conf-name>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing</conf-name><conf-date>Jun 7-11, 2022</conf-date><pub-id pub-id-type="doi">10.18653/v1/2022.emnlp-main.130</pub-id></nlm-citation></ref><ref id="ref166"><label>166</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Keloth</surname><given-names>VK</given-names> </name><name name-style="western"><surname>Selek</surname><given-names>S</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>Q</given-names> </name><etal/></person-group><article-title>Social determinants of health extraction from clinical notes across institutions using large language models</article-title><source>NPJ Digit Med</source><year>2025</year><month>05</month><day>17</day><volume>8</volume><issue>1</issue><fpage>287</fpage><pub-id pub-id-type="doi">10.1038/s41746-025-01645-8</pub-id><pub-id pub-id-type="medline">40379919</pub-id></nlm-citation></ref><ref id="ref167"><label>167</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ferrara</surname><given-names>E</given-names> </name></person-group><article-title>Should ChatGPT be biased? Challenges and risks of bias in large language models</article-title><source>FM</source><year>2023</year><pub-id pub-id-type="doi">10.5210/fm.v28i11.13346</pub-id></nlm-citation></ref><ref id="ref168"><label>168</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Thirunavukarasu</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Ting</surname><given-names>DSJ</given-names> </name><name name-style="western"><surname>Elangovan</surname><given-names>K</given-names> </name><name name-style="western"><surname>Gutierrez</surname><given-names>L</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>TF</given-names> </name><name name-style="western"><surname>Ting</surname><given-names>DSW</given-names> </name></person-group><article-title>Large language models in medicine</article-title><source>Nat Med</source><year>2023</year><month>08</month><volume>29</volume><issue>8</issue><fpage>1930</fpage><lpage>1940</lpage><pub-id pub-id-type="doi">10.1038/s41591-023-02448-8</pub-id><pub-id pub-id-type="medline">37460753</pub-id></nlm-citation></ref><ref id="ref169"><label>169</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Thirunavukarasu</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Hassan</surname><given-names>R</given-names> </name><name name-style="western"><surname>Mahmood</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Trialling a large language model (ChatGPT) in general practice with the applied knowledge test: observational study demonstrating opportunities and limitations in primary care</article-title><source>JMIR Med Educ</source><year>2023</year><month>04</month><day>21</day><volume>9</volume><fpage>e46599</fpage><pub-id pub-id-type="doi">10.2196/46599</pub-id><pub-id pub-id-type="medline">37083633</pub-id></nlm-citation></ref><ref id="ref170"><label>170</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kung</surname><given-names>TH</given-names> </name><name name-style="western"><surname>Cheatham</surname><given-names>M</given-names> </name><name name-style="western"><surname>Medenilla</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Performance of ChatGPT on USMLE: potential for AI-assisted medical education using large language models</article-title><source>PLOS Digit Health</source><year>2023</year><month>02</month><volume>2</volume><issue>2</issue><fpage>e0000198</fpage><pub-id pub-id-type="doi">10.1371/journal.pdig.0000198</pub-id><pub-id pub-id-type="medline">36812645</pub-id></nlm-citation></ref><ref id="ref171"><label>171</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Lin</surname><given-names>BY</given-names> </name><name name-style="western"><surname>He</surname><given-names>C</given-names> </name><name name-style="western"><surname>Ze</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>FedNLP: benchmarking federated learning methods for natural language processing tasks</article-title><conf-name>Findings of the Association for Computational Linguistics</conf-name><conf-date>Jul 10-15, 2021</conf-date><pub-id pub-id-type="doi">10.18653/v1/2022.findings-naacl.13</pub-id></nlm-citation></ref><ref id="ref172"><label>172</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Mohan</surname><given-names>K</given-names> </name></person-group><article-title>A study on performance limitations in federated learning</article-title><comment>Preprint posted online on  Jan 7, 2025</comment><pub-id pub-id-type="doi">10.48550/arXiv.2501.03477</pub-id></nlm-citation></ref><ref id="ref173"><label>173</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xu</surname><given-names>C</given-names> </name><name name-style="western"><surname>Qu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Xiang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Gao</surname><given-names>L</given-names> </name></person-group><article-title>Asynchronous federated learning on heterogeneous devices: a survey</article-title><source>Comput Sci Rev</source><year>2023</year><month>11</month><volume>50</volume><fpage>100595</fpage><pub-id pub-id-type="doi">10.1016/j.cosrev.2023.100595</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Search criteria.</p><media xlink:href="ai_v5i1e73481_app1.docx" xlink:title="DOCX File, 16 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Studies included in the review and variables extracted.</p><media xlink:href="ai_v5i1e73481_app2.xlsx" xlink:title="XLSX File, 69 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Models for non-English corpora.</p><media xlink:href="ai_v5i1e73481_app3.png" xlink:title="PNG File, 125 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4</label><p>Annotation methods for reference corpus. Annotation granularity ranged from the entity or concept level to the patient level, including sentence, document section, and document levels. No information: no description of annotation methods (studies that used existing tools, detailed methods described elsewhere).</p><media xlink:href="ai_v5i1e73481_app4.png" xlink:title="PNG File, 76 KB"/></supplementary-material><supplementary-material id="app5"><label>Checklist 1</label><p>PRISMA-ScR checklist.</p><media xlink:href="ai_v5i1e73481_app5.pdf" xlink:title="PDF File, 184 KB"/></supplementary-material></app-group></back></article>