<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR AI</journal-id><journal-id journal-id-type="publisher-id">ai</journal-id><journal-id journal-id-type="index">41</journal-id><journal-title>JMIR AI</journal-title><abbrev-journal-title>JMIR AI</abbrev-journal-title><issn pub-type="epub">2817-1705</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v4i1e57828</article-id><article-id pub-id-type="doi">10.2196/57828</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Harnessing Moderate-Sized Language Models for Reliable Patient Data Deidentification in Emergency Department Records: Algorithm Development, Validation, and Implementation Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Dor&#x00E9;mus</surname><given-names>Oc&#x00E9;ane</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Russon</surname><given-names>Dylan</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Contrand</surname><given-names>Benjamin</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Guerra-Adames</surname><given-names>Ariel</given-names></name><degrees>BEng, MSc</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Avalos-Fernandez</surname><given-names>Marta</given-names></name><degrees>HDR, PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Gil-Jardin&#x00E9;</surname><given-names>C&#x00E9;dric</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lagarde</surname><given-names>Emmanuel</given-names></name><degrees>HDR, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>AHeaD Team, University of Bordeaux, INSERM, BPH, U1219</institution><addr-line>146 Rue L&#x00E9;o Saignat</addr-line><addr-line>Bordeaux</addr-line><country>France</country></aff><aff id="aff2"><institution>SISTM Team, University of Bordeaux, INSERM, INRIA, BPH, U1219</institution><addr-line>Bordeaux</addr-line><country>France</country></aff><aff id="aff3"><institution>Department of Emergency Medicine, Bordeaux University Hospital</institution><addr-line>Bordeaux</addr-line><country>France</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Sun</surname><given-names>Jimeng</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Vashishtha</surname><given-names>Ela</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Gupta</surname><given-names>Gaurav Kumar</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Elbattah</surname><given-names>Mahmoud</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Oc&#x00E9;ane Dor&#x00E9;mus, MSc, AHeaD Team, University of Bordeaux, INSERM, BPH, U1219, 146 Rue L&#x00E9;o Saignat, Bordeaux, F-33000, France, 33 5 57 57 15 04; <email>oceane.doremus@u-bordeaux.fr</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>1</day><month>4</month><year>2025</year></pub-date><volume>4</volume><elocation-id>e57828</elocation-id><history><date date-type="received"><day>28</day><month>02</month><year>2024</year></date><date date-type="rev-recd"><day>28</day><month>08</month><year>2024</year></date><date date-type="accepted"><day>23</day><month>10</month><year>2024</year></date></history><copyright-statement>&#x00A9; Oc&#x00E9;ane Dor&#x00E9;mus, Dylan Russon, Benjamin Contrand, Ariel Guerra-Adames, Marta Avalos-Fernandez, C&#x00E9;dric Gil-Jardin&#x00E9;, Emmanuel Lagarde. Originally published in JMIR AI (<ext-link ext-link-type="uri" xlink:href="https://ai.jmir.org">https://ai.jmir.org</ext-link>), 1.4.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR AI, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.ai.jmir.org/">https://www.ai.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://ai.jmir.org/2025/1/e57828"/><abstract><sec><title>Background</title><p>The digitization of health care, facilitated by the adoption of electronic health records systems, has revolutionized data-driven medical research and patient care. While this digital transformation offers substantial benefits in health care efficiency and accessibility, it concurrently raises significant concerns over privacy and data security. Initially, the journey toward protecting patient data deidentification saw the transition from rule-based systems to more mixed approaches including machine learning for deidentifying patient data. Subsequently, the emergence of large language models has represented a further opportunity in this domain, offering unparalleled potential for enhancing the accuracy of context-sensitive deidentification. However, despite large language models offering significant potential, the deployment of the most advanced models in hospital environments is frequently hindered by data security issues and the extensive hardware resources required.</p></sec><sec><title>Objective</title><p>The objective of our study is to design, implement, and evaluate deidentification algorithms using fine-tuned moderate-sized open-source language models, ensuring their suitability for production inference tasks on personal computers.</p></sec><sec sec-type="methods"><title>Methods</title><p>We aimed to replace personal identifying information (PII) with generic placeholders or labeling non-PII texts as &#x201C;ANONYMOUS,&#x201D; ensuring privacy while preserving textual integrity. Our dataset, derived from over 425,000 clinical notes from the adult emergency department of the Bordeaux University Hospital in France, underwent independent double annotation by 2 experts to create a reference for model validation with 3000 clinical notes randomly selected. Three open-source language models of manageable size were selected for their feasibility in hospital settings: Llama 2 (Meta) 7B, Mistral 7B, and Mixtral 8&#x00D7;7B (Mistral AI). Fine-tuning used the quantized low-rank adaptation technique. Evaluation focused on PII-level (recall, precision, and <italic>F</italic><sub>1</sub>-score) and clinical note-level metrics (recall and BLEU [bilingual evaluation understudy] metric), assessing deidentification effectiveness and content preservation.</p></sec><sec sec-type="results"><title>Results</title><p>The generative model Mistral 7B performed the highest with an overall <italic>F</italic><sub>1</sub>-score of 0.9673 (vs 0.8750 for Llama 2 and 0.8686 for Mixtral 8&#x00D7;7B). At the clinical notes level, the model&#x2019;s overall recall was 0.9326 (vs 0.6888 for Llama 2 and 0.6417 for Mixtral 8&#x00D7;7B). This rate increased to 0.9915 when Mistral 7B only deleted names. Four notes of 3000 failed to be fully pseudonymized for names: in 1 case, the nondeleted name belonged to a patient, while in the others, it belonged to medical staff. Beyond the fifth epoch, the BLEU score consistently exceeded 0.9864, indicating no significant text alteration.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Our research underscores the significant capabilities of generative natural language processing models, with Mistral 7B standing out for its superior ability to deidentify clinical texts efficiently. Achieving notable performance metrics, Mistral 7B operates effectively without requiring high-end computational resources. These methods pave the way for a broader availability of pseudonymized clinical texts, enabling their use for research purposes and the optimization of the health care system.</p></sec></abstract><kwd-group><kwd>de-identification</kwd><kwd>machine learning</kwd><kwd>large language model</kwd><kwd>natural language processing</kwd><kwd>electronic health records</kwd><kwd>transformers</kwd><kwd>general data protection regulation</kwd><kwd>clinical notes</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>The digitization of medical data has profoundly transformed health care, facilitating the easy and efficient sharing of patient information [<xref ref-type="bibr" rid="ref1">1</xref>]. This digital transition, embodied by electronic health records systems, offers promising opportunities for data-driven solutions, research, and surveillance on a pan-European scale [<xref ref-type="bibr" rid="ref2">2</xref>]. Yet, alongside the many advantages of digitization come significant concerns about the privacy and security of sensitive patient data [<xref ref-type="bibr" rid="ref3">3</xref>]. The European General Data Protection Regulation emphasizes the necessity of stringent data protection measures, particularly for health-related information [<xref ref-type="bibr" rid="ref2">2</xref>]. Clinical notes, which often encompass identifiable patient details, must adhere to these standards to safeguard patient confidentiality [loi informatique et libert&#x00E9;], before any data sharing researchers face the critical task of developing and integrating methods that mask sensitive data, guaranteeing protection against any unauthorized access [<xref ref-type="bibr" rid="ref4">4</xref>]. Our team was recently faced with this challenge in a project aimed at classifying clinical notes from emergency services to extract the necessary information for the establishment of a trauma observatory [<xref ref-type="bibr" rid="ref5">5</xref>].</p><p>Manual deidentification of medical records is not feasible, as it is expensive in terms of personnel resources and the time required to accomplish the task. Alternatively, multiple strategies have been implemented for the automated deidentification of medical records [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. These methods evolved from systems based on explicit rules, regular expressions or dictionaries [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref16">16</xref>], to techniques using machine learning [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref19">19</xref>].</p><p>In recent years, the evolution of language models, particularly those based on transformer architectures, has reshaped the landscape of natural language processing (NLP). Transformers, introduced by Vaswani et al [<xref ref-type="bibr" rid="ref20">20</xref>] in 2017, provided a novel approach to handling sequential data using self-attention mechanisms, thereby obviating the need for recurrent layers and significantly augmenting training efficiency. This pivotal innovation paved the way for the advent of progressively sophisticated and expansive models. Transformer-based language models of a moderate scale, particularly through customized and fine-tuned versions of the architecture BERT [<xref ref-type="bibr" rid="ref21">21</xref>], have demonstrated high capabilities in various health care applications. These models excel in understanding and processing complex clinical texts, enabling tasks such as predicting patient outcomes and identifying medical events. For instance, a recent study highlighted the effectiveness of fine-tuned BERT models in analyzing clinical notes to predict occurrences of falls, showcasing the model&#x2019;s ability to comprehend subtle nuances in medical language [<xref ref-type="bibr" rid="ref22">22</xref>]. Additionally, BERT models offer significant benefits for tasks such as named entity recognition (NER). Those models offer notable benefits for deidentification, thanks to their capacity to discern patterns among words and phrases. They have the ability to learn from diverse text types means they can effectively tackle various pseudonymization challenges, as they can be trained to erase a wide range of identifiable details across different document types.</p><p>The burgeoning of computational resources and datasets has since kindled a shift toward the construction of massive models, embedded with trillions of parameters [<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref25">25</xref>]. As they grew in size, their generalization aptitude and versatility witnessed substantial enhancement, optimizing tasks such as deidentification. In 2023, Liu et al [<xref ref-type="bibr" rid="ref25">25</xref>] underscored the potential of leveraging the GPT-4&#x2019;s inherent capacity for 0-shot in-context learning. A salient highlight of their methodology was its ability to maintain the original structure and meaning of the text after the removal of confidential details. While the capabilities of GPT-4 are undeniable, its application in the realm of health care presents serious ethical and legal dilemmas, primarily concerning data privacy and patient confidentiality. On the one hand, due to the vastness of the model, local hosting of GPT-4 is not feasible, therefore, data should be transmitted to external servers, in this case OpenAI&#x2019;s infrastructure. On the other hand, considering the confidentiality of the weights, only locally hosted servers are regulatory compliant. Furthermore, considering that GPT-4 is a proprietary model, organizations cannot fully control or audit the underlying mechanics or data handling processes.</p><p>From a regulatory perspective, sending personal health information externally contravenes many data protection regulations, most notably the General Data Protection Regulation in Europe and the Health Insurance Portability and Accountability Act [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>] in the United States. This raises not just data sovereignty issues but also infringes on patient rights, as they might not have explicitly consented for their data to be processed in external environments. Hence, while the technological feats of models such as GPT-4 are commendable, their real-world applications, especially in sensitive sectors such as health care, require careful consideration and possibly, significant adjustments to ensure full regulatory compliance and ethical integrity.</p><p>Generative language models significantly smaller in size (several billion parameters compared to over a trillion for GPT-4) have been recently developed and made available to the public under licenses that allow for almost unrestricted use (Llama 2 by Meta [<xref ref-type="bibr" rid="ref28">28</xref>]) or even under open-source terms (Mistral [<xref ref-type="bibr" rid="ref29">29</xref>]).</p><p>The objective of our study is to design, implement, and evaluate deidentification methods involving proper prompt engineering and fine-tuning of 3, open-source language models (Llama 2 7B, Mistral 7B, and Mixtral 8&#x00D7;7B [<xref ref-type="bibr" rid="ref30">30</xref>]). These models were selected for their moderate size, making them suitable for deployment on personal computers for production inference tasks.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design</title><p>We first attempted to perform the task using only prompt engineering and 0-shot inference. As we failed to achieve any significant results, we improved the selected models&#x2019; capability to deidentify clinical texts using quantized low-rank adaptation [<xref ref-type="bibr" rid="ref31">31</xref>] fine-tuning with a dataset of instruction or response pairs. In practice, the task consists in replacing personal identifying information (PII; name, location, dates, telephone number, email, or identification numbers) with generic placeholders, represented as &#x201C;[XXXXX],&#x201D; or, when no PII is detected, by generating the text as &#x201C;ANONYMOUS.&#x201D; The ultimate goal of this procedure is to preserve text content, ensuring adherence to privacy and confidentiality requirements.</p></sec><sec id="s2-2"><title>Data Source, Datasets Allocations, and Annotation</title><p>Within the emergency department, triage is conducted by triage nurses. This process involves the collection of information on each patient, including medical history, current symptoms, vital signs, and personal details. It is these data that we have at our disposal in our study. For this investigation, we curated our dataset from a repository containing 425,680 clinical free-text notes (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), authored by a nurse during the initial reception and triage of individuals at the Bordeaux University Hospital&#x2019;s adult emergency department over the period spanning from January 2013 to December 2022. A subset of 6097 clinical notes was randomly selected and independently annotated by 2 experts. Any arising discrepancies were adjudicated by a third expert, thus establishing a reference database. From this curated sample of 6097 clinical notes, 3000 were delineated to constitute a test dataset, upon which accuracy metrics were evaluated (<xref ref-type="fig" rid="figure1">Figure 1</xref>). The residual 3097 clinical notes, alongside an additional sample of 3000 clinical notes designed using filters and keywords search to encompass a broad spectrum of identifying scenarios, comprised the validation dataset.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Data preparation: annotation and splitting into training and test sets.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e57828_fig01.png"/></fig><p>In order to further assess whether the deidentification performances of the models varies with the type of PII, we classified identifying information within clinical notes into 6 distinct categories (<xref ref-type="table" rid="table1">Table 1</xref>). These categories were used by annotators to label such information in the test dataset. While we have taken care to remove obvious PII such as names, addresses, and identification numbers, it is important to note that deidentification cannot be considered as a strict anonymization process. For instance, in cases of rare diseases or very specific descriptions, reidentification could theoretically be possible. As every clinical history is unique, ensuring complete anonymity is unattainable. Our goal is to pseudonymize data, striking a balance between patient confidentiality and data utility for research, as removing all sensitive information will significantly diminish the data&#x2019;s usefulness.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Personal identifying information categories description in medical records.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Type</td><td align="left" valign="bottom">Code</td><td align="left" valign="bottom">Description</td></tr></thead><tbody><tr><td align="left" valign="top">Individual names</td><td align="left" valign="top">NAME</td><td align="left" valign="top">Includes both first and last names of individuals (including patients and medical staff) or of relatives, employers, or household members of the individuals, ensuring personal identification.</td></tr><tr><td align="left" valign="top">Dates</td><td align="left" valign="top">DATE</td><td align="left" valign="top">Pertains to specific dates related to medical events, appointments, or personal milestones, formatted as day, month, or year.</td></tr><tr><td align="left" valign="top">Geographic identifiers</td><td align="left" valign="top">LOC<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="left" valign="top">Covers names of geographic locations such as cities, medical facilities, or addresses, facilitating location-based identification.</td></tr><tr><td align="left" valign="top">Phone numbers</td><td align="left" valign="top">TEL<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td><td align="left" valign="top">Comprises all forms of telephone numbers for direct contact, including mobile and landline numbers.</td></tr><tr><td align="left" valign="top">Email addresses</td><td align="left" valign="top">MAIL</td><td align="left" valign="top">Encompasses electronic mail addresses, allowing for digital communication.</td></tr><tr><td align="left" valign="top">Miscellaneous identifiers</td><td align="left" valign="top">OTHER</td><td align="left" valign="top">A catch-all category for unique identifiers not covered by other categories, including social security numbers, medical analysis codes, and URLs for patient images.</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>LOC: location.</p></fn><fn id="table1fn2"><p><sup>b</sup>TEL: telephone.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2-3"><title>Selected Models</title><p>We have selected 3 language models that share the following 2 characteristics: being open-source and of sufficiently small size for the production phase to be implemented on affordable PC-type systems. These are Llama 2 7B, Mistral 7B, and Mixtral. Llama 2 7B is developed by Meta. Launched in 2023, this is a 7-billion-parameter model, which is claimed to exhibit a good balance between performance and efficiency. We also selected the Mistral 7B model, introduced to the public in October 2023. It has demonstrated superior performance, either matching or surpassing that of Llama 2 13B in extensive benchmarks and showing comparable results to Llama 1 34B in specific domains such as reasoning, mathematics, and code generation. In December 2023, the Mixtral 8&#x00D7;7B model was released. It is described as a Sparse Mixture of Experts language model. Its key innovation lies in the routing of inference tasks through 1 selected expert out of 8, enabled by an additional routing layer. Consequently, despite its 8&#x00D7;7B size with respect to fine-tuning, Mixtral achieves a significant efficiency by requiring an eightfold reduction in parameters for inference task.</p></sec><sec id="s2-4"><title>Fine-Tuning and Inference</title><p>Each model was subjected to the same prompt or response pairs of clinical notes. The fine-tuning process was uniformly standardized across all 3 models, albeit with variations in batch sizes and quantization rates to accommodate our hardware constraints. The fine-tuning configuration for Mistral 7B and Llama 2 7B involved a batch size of 24 records per GPU, while Mixtral used a batch size of 20. The models were fine-tuned over 15 epochs, using the AdamW optimizer [<xref ref-type="bibr" rid="ref32">32</xref>] with a learning rate of 5e-5 and a weight decay of 0.01. We used the quantized low-rank adaptation technique, allowing for specific adjustments in selected parts of the model, such as query, key, value, output, and gates projection modules while preserving the overall architecture integrity. The low-rank adaptation configuration included a rank setting of 32, a learning rate multiplier (alpha) set to 64, with a dropout of 0.1, and without any bias setting. Additionally, to optimize computational efficiency and minimize memory consumption, the models were quantized to 8-bit precision for both 7B models, and 4-bit precision for Mixtral. At every fine-tuning epoch, the inference was induced for each model.</p><p>The computational undertakings of this research were performed on a server running Ubuntu (version 22.04; Canonical Ltd), outfitted with 4 A100 GPUs, collectively boasting 320GB of VRAM.</p></sec><sec id="s2-5"><title>Evaluation</title><sec id="s2-5-1"><title>Overview</title><p>In evaluating the deidentification performance of personal data within clinical notes, our analysis is structured around 2 primary methodologies. The first methodology operates at the PII-level, enabling us to provide estimates of recall, precision, and <italic>F</italic><sub>1</sub>-scores that are comparable with previous work in the literature. The second methodology focuses on clinical notes as the statistical unit, enabling us to assess the variation in recall performance according to the category of PII. This latter approach needs to be complemented by the measurement of a BLEU (bilingual evaluation understudy) score to assess potential modifications in the text. The assessment of the number of successful deidentifications was conducted through a comparison with the manually annotated test dataset.</p></sec><sec id="s2-5-2"><title>PII-Based Metrics</title><p>This approach centers on treating each PII as an independent statistical unit. This perspective allows us to gauge the precision and recall of our deidentification efforts at the most granular level. Recall in this context is conceptualized as the proportion of PIIs accurately identified and removed from the clinical notes.</p><disp-formula id="E1"><mml:math id="eqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>l</mml:mi><mml:mspace width="0.1cm"/><mml:mi>P</mml:mi><mml:mi>I</mml:mi><mml:mi>I</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>n</mml:mi><mml:mi>u</mml:mi><mml:mi>m</mml:mi><mml:mi>b</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>o</mml:mi><mml:mi>f</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>t</mml:mi><mml:mi>l</mml:mi><mml:mi>y</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mi>i</mml:mi><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi><mml:mi>i</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>P</mml:mi><mml:mi>I</mml:mi><mml:mi>I</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>p</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>c</mml:mi><mml:mi>l</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>i</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>n</mml:mi><mml:mi>u</mml:mi><mml:mi>m</mml:mi><mml:mi>b</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>o</mml:mi><mml:mi>f</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>P</mml:mi><mml:mi>I</mml:mi><mml:mi>I</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>p</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>c</mml:mi><mml:mi>l</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>i</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi></mml:mrow></mml:mfrac></mml:mrow></mml:mstyle></mml:math></disp-formula><p>Precision, meanwhile, reflects the accuracy of our model in identifying and eliminating actual PIIs, distinguishing between correct identifications and false positives.</p><disp-formula id="E2"><mml:math id="eqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mspace width="0.1cm"/><mml:mi>P</mml:mi><mml:mi>I</mml:mi><mml:mi>I</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>n</mml:mi><mml:mi>u</mml:mi><mml:mi>m</mml:mi><mml:mi>b</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>o</mml:mi><mml:mi>f</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>t</mml:mi><mml:mi>l</mml:mi><mml:mi>y</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mi>i</mml:mi><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi><mml:mi>i</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>P</mml:mi><mml:mi>I</mml:mi><mml:mi>I</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>p</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>c</mml:mi><mml:mi>l</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>i</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>n</mml:mi><mml:mi>u</mml:mi><mml:mi>m</mml:mi><mml:mi>b</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>o</mml:mi><mml:mi>f</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>P</mml:mi><mml:mi>I</mml:mi><mml:mi>I</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>g</mml:mi><mml:mi>g</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:mfrac></mml:mrow></mml:mstyle></mml:math></disp-formula><p>The summary <italic>F</italic><sub>1</sub>-score measure is:</p><disp-formula id="E3"><mml:math id="eqn3"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mstyle mathsize="0.7em"><mml:mn>1</mml:mn></mml:mstyle></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mi>s</mml:mi><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mfrac><mml:mn>2</mml:mn><mml:mn>1</mml:mn></mml:mfrac><mml:mrow><mml:mtext>precision</mml:mtext><mml:mo>+</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mtext>recall</mml:mtext></mml:mfrac></mml:mrow></mml:mfrac></mml:mrow></mml:mstyle></mml:math></disp-formula></sec><sec id="s2-5-3"><title>Clinical Note&#x2013;Based Metrics</title><p>The second approach adopts the entire clinical note as the statistical unit of analysis. Here we evaluate the success of deidentification on a document-wide scale, marking a &#x201C;success&#x201D; when every PII within a note has been successfully deidentified. Such a measure offers insight into the overall effectiveness of our deidentification protocols. Recall, in this instance, measures the ratio of fully deidentified notes to those containing any PII.</p><disp-formula id="E4"><mml:math id="eqn4"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>l</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>n</mml:mi><mml:mi>u</mml:mi><mml:mi>m</mml:mi><mml:mi>b</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>o</mml:mi><mml:mi>f</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>t</mml:mi><mml:mi>l</mml:mi><mml:mi>y</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mi>i</mml:mi><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi><mml:mi>i</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>c</mml:mi><mml:mi>l</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>i</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>a</mml:mi><mml:mi>m</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>g</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>i</mml:mi><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi><mml:mi>y</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>g</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>c</mml:mi><mml:mi>l</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>i</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>n</mml:mi><mml:mi>u</mml:mi><mml:mi>m</mml:mi><mml:mi>b</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>o</mml:mi><mml:mi>f</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>i</mml:mi><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi><mml:mi>y</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>g</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>c</mml:mi><mml:mi>l</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>i</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mspace width="thinmathspace"/><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi></mml:mrow></mml:mfrac></mml:mrow></mml:mstyle></mml:math></disp-formula><p>Because the clinical notes in the validation set are annotated by indicating the nature of the PII (according to the categories in <xref ref-type="table" rid="table1">Table 1</xref>), it is possible to detail the variations in recall by category. The relevance of precision is altered in this context, as it necessitates a different consideration of what constitutes a pseudonymization attempt, denoted by the presence of a pseudonymization tag. Instead, the potential alteration of content possibly induced by the deidentification process was measured using the BLEU score [<xref ref-type="bibr" rid="ref33">33</xref>].</p><disp-formula id="E5"><mml:math id="eqn5"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mi>B</mml:mi><mml:mi>L</mml:mi><mml:mi>E</mml:mi><mml:mi>U</mml:mi><mml:mo>=</mml:mo><mml:mi>B</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:mrow><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">x</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mo>&#x2211;</mml:mo><mml:mrow/><mml:msub><mml:mi>w</mml:mi><mml:mrow><mml:mi mathvariant="normal">n</mml:mi></mml:mrow></mml:msub><mml:mspace width="thinmathspace"/><mml:mi>l</mml:mi><mml:mi>o</mml:mi><mml:mi>g</mml:mi><mml:mspace width="thinmathspace"/><mml:msub><mml:mi>p</mml:mi><mml:mrow><mml:mi mathvariant="normal">n</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where BP is the brevity penalty, w<sub>n</sub> the weight for each n-gram, and p<sub>n</sub> the precision of n-grams. We set a value of 4 for the BLEU score calculation, aligning with common practice in NLP to capture up to 4-gram coherence, thereby ensuring a comprehensive evaluation of content preservation.</p></sec></sec><sec id="s2-6"><title>Ethical Considerations</title><sec id="s2-6-1"><title>Overview</title><p>This study was conducted as part of the Automated Processing of Emergency Department Visit Summaries for a National Observatory project, which aims to automate the processing of emergency department visit summaries for national observation purposes.</p><p>The study received the following regulatory approvals: (1) the Ethics Committee for Research in Science and Health, validating the compliance of the protocol with current ethical requirements; and (2) the National Commission on Informatics and Liberty, under decision DR-2022-235 (authorization request 922170), allowing the processing of data for this study.</p></sec><sec id="s2-6-2"><title>Confidentiality and Data Protection</title><p>The data processing was carried out exclusively on a secure local server, specially dedicated to this purpose. This server meets the current security standards, ensuring the confidentiality, integrity, and protection of the processed information. All necessary technical and organizational measures have been implemented to prevent unauthorized access to the data and to ensure strict compliance with regulatory requirements.</p></sec><sec id="s2-6-3"><title>Compensation</title><p>Since this study relies solely on the analysis of pre-existing medical data and does not require direct patient involvement, no financial compensation was provided.</p></sec></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Data Overview</title><p>Very few notes contained PIIs categorized as email addresses and &#x201C;other.&#x201D; These categories are included in the training sample due to an ad hoc selection process, which used filters to ensure representation, as half of the set was selected this way. Our examination of the test sample, which consists entirely of randomly selected clinical notes, reveals that names, places, and dates are the most prevalent types of PII. The categories of identifying data in the training and test sets are summarized in <xref ref-type="table" rid="table2">Table 2</xref>.</p><p>Regarding the length of clinical notes, they range from 8 to 3916 characters (with an average of 443, SD 289 characters) in the training set and from 3 to 2138 characters (averaging 439, SD 283 characters) in the test set. A total of 935 (31.2%) clinical notes in the test set contain at least one PII.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Enhanced distribution of PII<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup> in train and tests sets.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">Train set</td><td align="left" valign="bottom">Test set</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="3"><bold>Clinical notes</bold></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Nonanonymous medical notes, n (%)</td><td align="left" valign="top">3442 (56.5)</td><td align="left" valign="top">935 (31.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Randomly selected medical notes, n</td><td align="left" valign="top">3097</td><td align="left" valign="top">3000</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Ad hoc selected medical notes, n</td><td align="left" valign="top">3000</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Total count, n</td><td align="left" valign="top">6097</td><td align="left" valign="top">3000</td></tr><tr><td align="left" valign="top" colspan="3"><bold>PII categories, n</bold></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>NAME</td><td align="left" valign="top">3016</td><td align="left" valign="top">555</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>LOC<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="top">1801</td><td align="left" valign="top">715</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>TEL<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">650</td><td align="left" valign="top">41</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>EMAIL</td><td align="left" valign="top">13</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>DATE</td><td align="left" valign="top">2404</td><td align="left" valign="top">607</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>OTHER</td><td align="left" valign="top">33</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Total number of PII</td><td align="left" valign="top">7917</td><td align="left" valign="top">1919</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>PII: personal identifying information.</p></fn><fn id="table2fn2"><p><sup>b</sup>This corresponds to the absence of ad-hoc selected medical notes.</p></fn><fn id="table2fn3"><p><sup>c</sup>LOC: location.</p></fn><fn id="table2fn4"><p><sup>d</sup>TEL: telephone.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-2"><title>Performance Using PII-Based Metrics</title><p><xref ref-type="fig" rid="figure2">Figure 2</xref> plots the change in the <italic>F</italic><sub>1</sub>-score over the 15 epochs of fine-tuning for the 3 respective models. The Mistral 7B model quickly reaches a performance plateau, where its <italic>F</italic><sub>1</sub>-score stabilizes, whereas the Mixtral 8&#x00D7;7B and Llama 2 7B models exhibit a slower rate of improvement, with both reaching a plateau in their <italic>F</italic><sub>1</sub>-scores around the 12th epoch.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Plot of <italic>F</italic><sub>1</sub>-score by epoch: PII as statistical unit.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e57828_fig02.png"/></fig></sec><sec id="s3-3"><title>Recall Analysis</title><p>The recall estimates of the 3 models are shown in <xref ref-type="fig" rid="figure3">Figures 3</xref> and <xref ref-type="fig" rid="figure4">4</xref>.</p><p>Mistral 7B and Mixtral 8&#x00D7;7B achieved better overall recall. The Mistral 7B and Mixtral 8&#x00D7;7B models demonstrated marked enhancements in their deidentification efficacy across epochs, starting from the third epoch onward. Notably, the Mistral 7B model has shown a rapid improvement in performance, achieving a performance plateau by the sixth epoch. Conversely, the Mixtral 8&#x00D7;7B model&#x2019;s improvement trajectory was more gradual, reaching a stable performance level by the 13 epoch. The overall success rate appears not to improve beyond epoch 7 for the Mistral 7B model. Consequently, in the subsequent analysis, this epoch was selected for comparing success rates across categories.</p><p>As shown in <xref ref-type="fig" rid="figure5">Figure 5</xref>, Mistral 7B consistently outperformed Mixtral 8&#x00D7;7B and Llama 2 across all data identification categories. Despite Mixtral&#x2019;s performance improving over time, it still did not surpass Mistral 7B. Using Mistral 7B, a 100% (41/41) recall was observed for phone numbers (<xref ref-type="fig" rid="figure5">Figure 5</xref>) and recall was lower for locations than for names.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Plot of recall by epoch: clinical notes as statistical unit.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e57828_fig03.png"/></fig><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Plot of recall by epoch: PII as statistical unit. PII: personal identifying information.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e57828_fig04.png"/></fig><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Plot of recall by epoch for PII: (A) Location, (B) Telephone, (C) Name, (D) Date. PII: personal identifying information.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e57828_fig05.png"/></fig></sec><sec id="s3-4"><title>BLEU Score</title><p>BLEU-4 scores were calculated to assess whether the models modified the texts at the note level. During the deidentification process, medical texts remained almost unchanged as demonstrated by a consistently high BLEU-4 score (<xref ref-type="fig" rid="figure6">Figure 6</xref>) beyond epoch 5.</p><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Plot of BLEU score by epoch: clinical note as statistical unit. BLEU: bilingual evaluation understudy</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e57828_fig06.png"/></fig></sec><sec id="s3-5"><title>Results Summary at Epoch 7</title><p>The <xref ref-type="table" rid="table3">Table 3</xref> below presents a summary of performance metrics achieved by our models at epoch 7.</p><p>The results demonstrate that the Mistral 7B model outperforms both the Mixtral 8&#x00D7;7B and Llama 2 7B with a <italic>F</italic><sub>1</sub>-score of 0.9673. When using clinical note as the statistical unit, the recall is also much higher (0.9326) for Mistral 7B than Llama 2 and Mixtral 8&#x00D7;7B models.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Fine-tuned models performance at epoch 7.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">Clinical notes</td><td align="left" valign="bottom" colspan="3">Personal identifying information</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Recall</td><td align="left" valign="top">Precision</td><td align="left" valign="top">Recall</td><td align="left" valign="top"><italic>F</italic><sub>1</sub>-score</td></tr></thead><tbody><tr><td align="left" valign="top">Mistral 7B</td><td align="left" valign="top">0.9326</td><td align="left" valign="top">0.9721</td><td align="left" valign="top">0.9625</td><td align="left" valign="top">0.9673</td></tr><tr><td align="left" valign="top">Llama 2 7B</td><td align="left" valign="top">0.6888</td><td align="left" valign="top">0.9596</td><td align="left" valign="top">0.8041</td><td align="left" valign="top">0.875</td></tr><tr><td align="left" valign="top">Mixtral 8&#x00D7;7B</td><td align="left" valign="top">0.6417</td><td align="left" valign="top">0.9852</td><td align="left" valign="top">0.7655</td><td align="left" valign="top">0.8616</td></tr></tbody></table></table-wrap></sec><sec id="s3-6"><title>Error Analysis</title><p>In epoch 7 of the Mistral 7B model, a total of 63 clinical notes were not properly pseudonymized, as detailed in <xref ref-type="table" rid="table4">Table 4</xref>. Among these, location (LOC) errors were the most frequent, with 44 instances. Deleting geographical and institutional identifiers then remains a significant challenge (with a recall of 86.1%). Specifically, 31 notes still included names of health or social service facilities, while 12 notes still included names of cities. Conversely, errors involving names (NAME) were significantly fewer, with only 4 instances, including 1 patient name and 3 doctors&#x2019; names, resulting in a high recall of 99.8% for this category. Date-related errors (DATE) were observed in 14 notes (with a recall of 97.8%).</p><p>The test dataset, comprising 3000 clinical notes, underwent a post hoc examination to identify any inaccuracies resulting from manual annotations that would have been detected by all 15 versions of our 3 finely-tuned models, spanning epochs 1 to 15. Through this process, we were able to pinpoint 65 notes in which the model detected personally identifiable information through the medical histories that were categorized as anonymous (ie, without identifying data, 2066 clinical notes), in which the model detected personally identifying information that had been overlooked by human annotators.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Summary of deidentification errors at epoch 7.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Errors</td><td align="left" valign="bottom">Count</td></tr></thead><tbody><tr><td align="left" valign="top">Total</td><td align="left" valign="top">63</td></tr><tr><td align="left" valign="top">Returned ANONYMOUS</td><td align="left" valign="top">29</td></tr><tr><td align="left" valign="top">Annotation error</td><td align="left" valign="top">34</td></tr><tr><td align="left" valign="top" colspan="2"><bold>Errors in personal identifying information categories</bold></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>NAME</td><td align="left" valign="top">4</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>LOC<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">44</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>DATE</td><td align="left" valign="top">14</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>OTHER</td><td align="left" valign="top">1</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>LOC: location.</p></fn></table-wrap-foot></table-wrap><p>We observed that the models outperformed human annotation in 9 clinical records from the test set. Specifically, in these 9 records, 5 locations (LOC), 3 names (NAMES), and 1 date (DATE) were omitted during manual annotation. The remaining 53 records present annotation errors from the models. Therefore, the total number of actual personally identifiable information (PII) amounts to 1928, contrary to the 1919 initially identified by our experts.</p><p>Subsequently, corrections were made to the test dataset based on these findings, and main outcomes were recomputed in an additional sensitive analysis. The metric measurements after accounting for these modifications are only slightly altered from the original results (see <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> for the details).</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>In this study, we assessed the performance of 3 generative NLP models in the deidentification of clinical text documents. The generative model Mistral 7B demonstrated the highest performance with an overall <italic>F</italic><sub>1</sub>-score of 0.9673. At the clinical notes level, the same model achieved an overall recall of 0.9326, with this rate increasing to 0.9915 for the deletion of names. The evaluation was based on a test dataset of 3000 clinical notes, among which only 4 notes failed to be fully deidentified for names; in one case, the identifying name was that of a patient. As the method relies on the use of generative models, we also measured potential text alterations generated by the process. Beyond the fifth epoch, the BLEU score consistently exceeded 0.9864.</p></sec><sec id="s4-2"><title>Strengths</title><p>Our work distinguishes itself from the existing scientific literature by using a method that does not rely on NER and uses moderate-sized models. Instead, the use of generative large language models allows for the production of text that is pseudonymized by removing PII components. This is the reason why we added metrics that use clinical notes as the statistical unit. This led us to use the BLEU metric to assess potential text alterations. Another consequence of this method is that no hyperparameters are set which made it possible to avoid the use of separate test and validation dataset partitions. The size of our training and test samples, independently annotated by 2 experts, constitutes a significant strength in our study. To our knowledge, no other study has used a test sample of such size (3000 notes). Yet, it is crucial to have the means to detect rare errors if the ultimate goal is to develop a system that guarantees the pseudonymization of clinical texts. We deliberately limited our model selection to those whose implementation does not require powerful servers and can be executed on personal computers equipped with a consumer-grade graphics card. The largest model is Mixtral 8&#x00D7;7B, which has approximately 8 times more parameters than the other 2 models. Mixtral 8&#x00D7;7B shares the same architecture as Mistral 7B, with the distinction that each layer consists of 8 feed-forward blocks. Although training it requires significant memory capacity, this is not the case during the inference phase, during which only 2 of the feed-forward blocks are used, selected by a network acting as a router.</p></sec><sec id="s4-3"><title>Limitations</title><sec id="s4-3-1"><title>Annotation Process Inaccuracies</title><sec id="s4-3-1-1"><title>Overview</title><p>During the annotation process, we observed some inaccuracies. To assess the impact of these inaccuracies on our metrics, we conducted a post hoc analysis, taking into account corrections made by the model. Although this analysis revealed few variations, it is important to note that some errors may still remain in the text set, undetected by the model. These undetected errors could potentially affect the overall performance of the model.</p></sec><sec id="s4-3-1-2"><title>Model Choice</title><p>We opted for a fine-tuned large language model&#x2013;based approach over a dedicated NER model due to pragmatic considerations. Our hypothesis was that a targeted human annotation process, with expert annotators pinpointing PII within texts, would be more effective than a broad NER annotation effort, given the same time investment. Focusing on essential PII elements helps us minimize the ambiguities that broader NER annotations often entail. This focus leads to improved precision and recall rates during the training phase. Furthermore, this approach is in line with the Automated Processing of Emergency Department Visit Summaries for a National Observatory project&#x2019;s objectives, which prioritize the accurate removal of PII from unstructured medical texts.</p><p>The default choice for identification tasks is usually a bidirectional transformer, starting from the hypothesis that the relationship of a word with its context before and after that word allows for better comprehension of the role of those words and therefore should be more suited for NER tasks. However, this hypothesis no longer holds when dealing with generative models. Since the goal here is to generate redacted text, the provided prompt has access to the entire corrected phrase. Consequently, relative to a given word, implications cannot be considered unidirectional.</p></sec></sec></sec><sec id="s4-4"><title>Model Sharing Constraints</title><sec id="s4-4-1"><title>Overview</title><p>Another significant limitation is that our model was fine-tuned using nonanonymous clinical texts, which prevents us from sharing the model&#x2019;s weights with the community. Sharing the model&#x2019;s weights could potentially allow for the extraction of the original training data. This limitation restricts the model&#x2019;s reproducibility and its broader applicability across different research settings and medical domains.</p></sec><sec id="s4-4-2"><title>Demographic and Textual Bias</title><p>The processed data are in free-text format, written by health care staff, which introduces significant variability. This variability is not only present between different services within the same health facility but also across various centers. Factors such as the content of clinical notes, the medical abbreviations used, writing styles, and the level of detail in documentation can differ greatly from one source to another. Such differences could potentially impact the performance of our models, making it essential to test and adapt our approach to data from diverse sources.</p></sec></sec><sec id="s4-5"><title>Comparison With Prior Work</title><p>Comparing the performance of our models with those documented in the literature presents challenges because our models are specifically fine-tuned to pseudonymize French-language clinical notes. Consequently, it is not feasible to apply them to the English-language databases traditionally used for benchmarking, such as i2b2 (i2b2 TranSMART Foundation) [<xref ref-type="bibr" rid="ref34">34</xref>], MIMIC II (PhysioNet) [<xref ref-type="bibr" rid="ref35">35</xref>], and MIMIC III (PhysioNet) [<xref ref-type="bibr" rid="ref36">36</xref>].</p><p>In addition to these differences in benchmarking context, there are also divergences in the methodologies used for deidentification. Historically, deidentification of medical records has evolved from rule-based systems, which rely on predefined rules, regular expressions, and dictionaries, to more sophisticated machine learning approaches. Rule-based methods, while easy to implement and interpret, often fall short in handling the variability and unpredictability inherent in unstructured clinical texts. On the other hand, machine learning-based approaches offer more flexibility and adaptability, particularly when dealing with large and diverse datasets. These models can learn patterns directly from the data, making them more effective in identifying PIIs that deviate from standard formats. However, their effectiveness is heavily dependent on the quality and quantity of annotated data available for training. Moreover, machine learning models typically require significant computational resources and expertise in model tuning, which can be a barrier to adoption, particularly in resource-constrained settings.</p><p>Our proposed model leverages these advanced machine learning techniques, specifically fine-tuned for the French language. This focus allows our model to effectively capture and manage the linguistic intricacies specific to French clinical notes, such as frequent abbreviations and unstructured text entries, which are common in emergency department settings.</p><p>Additionally, our results demonstrate that while our model performs comparably to those trained on English-language corpora, certain challenges persist, particularly in the detection of location-based PIIs. This is likely due to the complexity introduced by variations in PII forms, such as acronyms and abbreviations, as well as the presence of typing errors, which are less predictable and harder to model.</p><p>Therefore, to compare performance metrics accurately, it is necessary to assess the complexity of clinical texts from these databases against those used in our study. In the <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, we include examples of clinical notes from our dataset to demonstrate that PIIs can appear randomly within the text, in an unstructured manner, and that these PIIs, along with the rest of the text, often include numerous abbreviations. This tendency toward abbreviation is explained by the unique demands of emergency department settings, where nurses are required to perform efficient, real-time data entry into the hospital&#x2019;s information system. As a result, our dataset more closely aligns with MIMIC II, which features unstructured clinical notes made by nurses, as opposed to i2b2, where each type of information is distinctly separated, preventing the amalgamation of multiple PIIs within single sentences.</p><p>As shown in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> [<xref ref-type="bibr" rid="ref37">37</xref>-<xref ref-type="bibr" rid="ref43">43</xref>], our results (overall <italic>F</italic><sub>1</sub>-score of 0.9673) are on par with previous studies on English clinical text corpus that used an algorithm including models using self-attention [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref44">44</xref>]. The <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref> [<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref43">43</xref>] summarizes study results that examined recall variations according to PII categories. These figures consistently show that the relative weakness of these algorithms, ours included, lies in a small number of errors concerning locations. Our dataset presents additional challenges for PII identification due to the presence of multiple variations of PII, including acronyms, abbreviations, and typing errors. Specifically, of the 44 notes with failed identification, 15 involved abbreviations or acronyms, and 2 contained typing errors.</p></sec><sec id="s4-6"><title>Future Work</title><p>We aim to enhance the detection capabilities of PII in our medical notes by fine-tuning our model with newly annotated data. To achieve this, we plan to generate artificial clinical notes using commercially available application programming interfaces, such as GPT-4. These large language models, much more powerful than ours, can produce realistic notes containing PII and annotations, which will facilitate the training process and increase data diversity.</p><p>By generating a substantial volume of these artificial data, we can ensure equitable representation of different PII categories and evaluate 2 key aspects: identifying the optimal amount of clinical notes needed to achieve the highest possible accuracy and recall, and comparing the effectiveness of models fine-tuned with real data versus those fine-tuned with artificially generated data.</p><p>Using this newly developed model based on artificial data, we aim to make it available as an open-source resource, benefiting the broader community. Additionally, this foundation will enable us to create a multilingual model capable of processing both English and French clinical notes. This multilingual model will allow us to perform performance comparisons against literature benchmark datasets such as i2b2 and MIMIC. The performance of these refined models will be evaluated using our corrected test set, along with newly annotated data from various emergency services.</p><p>This study is currently focused on data from an emergency department in France. In the subsequent phases, our goal is to extend this methodology to other services across France, with the ambition of creating a national French observatory on trauma. However, it is important to consider the potential for demographic biases in our model&#x2019;s performance.</p><p>By diversifying data sources, we aim to enhance the model&#x2019;s generalizability. If biases are identified in this process, we plan to retrain the model, either by using a specific portion of data from each service or by integrating synthetic data to mitigate these biases.</p><p>We intend to extend our methodology to other types of sensitive documents, such as medico-legal records, to evaluate the generalizability and effectiveness of our approach in protecting personal information across various domains.</p><p>We are also considering integrating explainability methods, similar to those used by Arnaud et al [<xref ref-type="bibr" rid="ref45">45</xref>], to enhance the transparency of our model in PII detection. These techniques, based on transformer models and interpretability approaches such as LIME [<xref ref-type="bibr" rid="ref46">46</xref>], which have already proven effective on triage note data similar to ours, could strengthen user trust and facilitate the adoption of our technologies in clinical settings.</p><p>Through this comprehensive approach, we aim to enhance the value and applicability of our models, contributing to the development of privacy-preserving technologies in the health care domain and strengthening the security of patients&#x2019; sensitive information.</p></sec><sec id="s4-7"><title>Ethical Considerations and Practical Implementations</title><p>The use of small to moderate-sized models is a key consideration in our approach. These models are generally capable of running on GPUs with at least 16 GB of VRAM, making them suitable for use on personal computers or within local infrastructures. This is particularly advantageous for institutions with limited resources, as it allows them to manage data privately and securely without relying on extensive external infrastructure. However, while local deployment ensures better control over sensitive data, it can also be time-consuming and may introduce challenges related to the interoperability of different systems.</p><p>One of the main challenges of this pipeline is its implementation across all participating emergency services, given that not all institutions may be equipped to efficiently manage these new procedures. The rationale behind implementing this process is rooted in a data-sharing initiative aimed at establishing a national observatory, which necessitates enhanced protection for the information being used.</p><p>At this stage, centralizing the data in a dedicated center with the necessary computational resources remains the simplest solution. This would allow for secure, controlled, and efficient management of patient data. Alternatively, the process could be implemented directly within health data warehouses, enabling these facilities to store and apply the deidentification process locally. Regardless of the approach, it is imperative that the use of this pipeline on health data is conducted within a legally and digitally controlled framework, authorized by the relevant authorities.</p><p>Given the potential risks of data reidentification, especially when dealing with unique clinical histories, we emphasize that pseudonymization alone is insufficient and should be accompanied by additional protection and security measures to prevent unauthorized access to sensitive data.</p></sec><sec id="s4-8"><title>Conclusion</title><p>Our research underscores the significant capabilities of generative NLP models, with Mistral 7B standing out for its superior ability to deidentify clinical texts efficiently. Achieving notable performance metrics, Mistral 7B operates effectively without requiring high-end computational resources. These methods pave the way for a broader availability of pseudonymized clinical texts, enabling their use for research purposes and the optimization of the health care system.</p></sec></sec></body><back><ack><p>This study was conducted under the Automated Processing of Emergency Department Visit Summaries for a National Observatory (TARPON) project by the Bordeaux Population Health-Assessing Health in a Digitalizing Real-World team and the Bordeaux University Hospital's emergency department. We thank the labeling team and the University Hospital of Bordeaux for their logistical support and data access.</p></ack><notes><sec><title>Data Availability</title><p>The datasets generated or analyzed during this study are not publicly available due to the confidential nature of the patient data used.</p></sec></notes><fn-group><fn fn-type="con"><p>EL, CG-J, and MA-F did the conceptualization and design. BC, OD, EL, DR, and CG-J worked on the annotation. OD, CG-J, and EL analyzed and interpreted. OD, EL, and AG-A drafted this paper. All authors handled the critical revision. CG-J provided this study's material. EL supervised.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">BLEU</term><def><p>bilingual evaluation understudy</p></def></def-item><def-item><term id="abb2">NER</term><def><p>named entity recognition</p></def></def-item><def-item><term id="abb3">NLP</term><def><p>natural language processing</p></def></def-item><def-item><term id="abb4">PII</term><def><p>personal identifying information</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Menachemi</surname><given-names>N</given-names> </name><name name-style="western"><surname>Collum</surname><given-names>TH</given-names> </name></person-group><article-title>Benefits and drawbacks of electronic health record systems</article-title><source>Risk Manag Healthc Policy</source><year>2011</year><volume>4</volume><fpage>47</fpage><lpage>55</lpage><pub-id pub-id-type="doi">10.2147/RMHP.S12985</pub-id><pub-id pub-id-type="medline">22312227</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="web"><article-title>Regulation (EU) 2016/679 of the european parliament and of the council of 27 april 2016 on the protection of natural persons with regard to the processing of personal data and on the free movement of such data, and repealing directive 95/46/EC (general data protection regulation)</article-title><source>European Parliament and Council</source><year>2016</year><access-date>2025-03-31</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://dvbi.ru/Portals/0/DOCUMENTS_SHARE/RISK_MANAGEMENT/EBA/GDPR_eng_rus.pdf">https://dvbi.ru/Portals/0/DOCUMENTS_SHARE/RISK_MANAGEMENT/EBA/GDPR_eng_rus.pdf</ext-link></comment></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="web"><article-title>MHealth: new horizons for health through mobile technologies: second global survey on ehealth</article-title><source>World Health Organization</source><year>2012</year><access-date>2025-03-31</access-date><publisher-name>World Health Organization</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://iris.who.int/bitstream/handle/10665/44607/9789241564250_eng.pdf?sequence=1&#x0026;isAllowed=y">https://iris.who.int/bitstream/handle/10665/44607/9789241564250_eng.pdf?sequence=1&#x0026;isAllowed=y</ext-link></comment></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>El Emam</surname><given-names>K</given-names> </name></person-group><article-title>Methods for the de-identification of electronic health records for genomic research</article-title><source>Genome Med</source><year>2011</year><month>04</month><day>27</day><volume>3</volume><issue>4</issue><fpage>25</fpage><pub-id pub-id-type="doi">10.1186/gm239</pub-id><pub-id pub-id-type="medline">21542889</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chenais</surname><given-names>G</given-names> </name><name name-style="western"><surname>Gil-Jardin&#x00E9;</surname><given-names>C</given-names> </name><name name-style="western"><surname>Touchais</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Deep learning transformer models for building a comprehensive and real-time trauma observatory: development and validation study</article-title><source>JMIR AI</source><year>2023</year><month>01</month><day>12</day><volume>2</volume><fpage>e40843</fpage><pub-id pub-id-type="doi">10.2196/40843</pub-id><pub-id pub-id-type="medline">38875539</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Meystre</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Friedlin</surname><given-names>FJ</given-names> </name><name name-style="western"><surname>South</surname><given-names>BR</given-names> </name><name name-style="western"><surname>Shen</surname><given-names>S</given-names> </name><name name-style="western"><surname>Samore</surname><given-names>MH</given-names> </name></person-group><article-title>Automatic de-identification of textual documents in the electronic health record: a review of recent research</article-title><source>BMC Med Res Methodol</source><year>2010</year><month>08</month><day>2</day><volume>10</volume><fpage>70</fpage><pub-id pub-id-type="doi">10.1186/1471-2288-10-70</pub-id><pub-id pub-id-type="medline">20678228</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Negash</surname><given-names>B</given-names> </name><name name-style="western"><surname>Katz</surname><given-names>A</given-names> </name><name name-style="western"><surname>Neilson</surname><given-names>CJ</given-names> </name><etal/></person-group><article-title>De-identification of free text data containing personal health information: a scoping review of reviews</article-title><source>Int J Popul Data Sci</source><year>2023</year><volume>8</volume><issue>1</issue><fpage>2153</fpage><pub-id pub-id-type="doi">10.23889/ijpds.v8i1.2153</pub-id><pub-id pub-id-type="medline">38414537</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Beckwith</surname><given-names>BA</given-names> </name><name name-style="western"><surname>Mahaadevan</surname><given-names>R</given-names> </name><name name-style="western"><surname>Balis</surname><given-names>UJ</given-names> </name><name name-style="western"><surname>Kuo</surname><given-names>F</given-names> </name></person-group><article-title>Development and evaluation of an open source software tool for deidentification of pathology reports</article-title><source>BMC Med Inform Decis Mak</source><year>2006</year><month>03</month><day>6</day><volume>6</volume><fpage>12</fpage><pub-id pub-id-type="doi">10.1186/1472-6947-6-12</pub-id><pub-id pub-id-type="medline">16515714</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Berman</surname><given-names>JJ</given-names> </name></person-group><article-title>Concept-match medical data scrubbing</article-title><source>Arch Pathol Lab Med</source><year>2003</year><month>06</month><day>1</day><volume>127</volume><issue>6</issue><fpage>680</fpage><lpage>686</lpage><pub-id pub-id-type="doi">10.5858/2003-127-680-CMDS</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Friedlin</surname><given-names>FJ</given-names> </name><name name-style="western"><surname>McDonald</surname><given-names>CJ</given-names> </name></person-group><article-title>A software tool for removing patient identifying information from clinical documents</article-title><source>J Am Med Inform Assoc</source><year>2008</year><volume>15</volume><issue>5</issue><fpage>601</fpage><lpage>610</lpage><pub-id pub-id-type="doi">10.1197/jamia.M2702</pub-id><pub-id pub-id-type="medline">18579831</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gupta</surname><given-names>D</given-names> </name><name name-style="western"><surname>Saul</surname><given-names>M</given-names> </name><name name-style="western"><surname>Gilbertson</surname><given-names>J</given-names> </name></person-group><article-title>Evaluation of a deidentification (De-Id) software engine to share pathology reports and clinical documents for research</article-title><source>Am J Clin Pathol</source><year>2004</year><month>02</month><volume>121</volume><issue>2</issue><fpage>176</fpage><lpage>186</lpage><pub-id pub-id-type="doi">10.1309/E6K3-3GBP-E5C2-7FYU</pub-id><pub-id pub-id-type="medline">14983930</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Morrison</surname><given-names>FP</given-names> </name><name name-style="western"><surname>Li</surname><given-names>L</given-names> </name><name name-style="western"><surname>Lai</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Hripcsak</surname><given-names>G</given-names> </name></person-group><article-title>Repurposing the clinical record: can an existing natural language processing system de-identify clinical notes?</article-title><source>J Am Med Inform Assoc</source><year>2009</year><volume>16</volume><issue>1</issue><fpage>37</fpage><lpage>39</lpage><pub-id pub-id-type="doi">10.1197/jamia.M2862</pub-id><pub-id pub-id-type="medline">18952938</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Neamatullah</surname><given-names>I</given-names> </name><name name-style="western"><surname>Douglass</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Lehman</surname><given-names>LwH</given-names> </name><etal/></person-group><article-title>Automated de-identification of free-text medical records</article-title><source>BMC Med Inform Decis Mak</source><year>2008</year><month>07</month><day>24</day><volume>8</volume><fpage>32</fpage><pub-id pub-id-type="doi">10.1186/1472-6947-8-32</pub-id><pub-id pub-id-type="medline">18652655</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ruch</surname><given-names>P</given-names> </name><name name-style="western"><surname>Baud</surname><given-names>RH</given-names> </name><name name-style="western"><surname>Rassinoux</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Bouillon</surname><given-names>P</given-names> </name><name name-style="western"><surname>Robert</surname><given-names>G</given-names> </name></person-group><article-title>Medical document anonymization with a semantic lexicon</article-title><source>Proc AMIA Symp</source><year>2000</year><fpage>729</fpage><lpage>733</lpage><pub-id pub-id-type="medline">11079980</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sweeney</surname><given-names>L</given-names> </name></person-group><article-title>Replacing personally-identifying information in medical records, the Scrub system</article-title><source>Proc AMIA Annu Fall Symp</source><year>1996</year><fpage>333</fpage><lpage>337</lpage><pub-id pub-id-type="medline">8947683</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Thomas</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Mamlin</surname><given-names>B</given-names> </name><name name-style="western"><surname>Schadow</surname><given-names>G</given-names> </name><name name-style="western"><surname>McDonald</surname><given-names>C</given-names> </name></person-group><article-title>A successful technique for removing names in pathology reports using an augmented search and replace method</article-title><source>Proc AMIA Symp</source><year>2002</year><fpage>777</fpage><lpage>781</lpage><pub-id pub-id-type="medline">12463930</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ahmed</surname><given-names>T</given-names> </name><name name-style="western"><surname>Aziz</surname><given-names>MMA</given-names> </name><name name-style="western"><surname>Mohammed</surname><given-names>N</given-names> </name></person-group><article-title>De-identification of electronic health record using neural network</article-title><source>Sci Rep</source><year>2020</year><month>10</month><day>29</day><volume>10</volume><issue>1</issue><fpage>18600</fpage><pub-id pub-id-type="doi">10.1038/s41598-020-75544-1</pub-id><pub-id pub-id-type="medline">33122735</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Guo</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Gaizauskas</surname><given-names>RJ</given-names> </name><name name-style="western"><surname>Roberts</surname><given-names>I</given-names> </name><name name-style="western"><surname>Demetriou</surname><given-names>G</given-names> </name><name name-style="western"><surname>Hepple</surname><given-names>M</given-names> </name></person-group><article-title>Identifying personal health information using support vector machines</article-title><source>Semantic Scholar</source><year>2006</year><access-date>2025-03-31</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://api.semanticscholar.org/CorpusID:16833759">https://api.semanticscholar.org/CorpusID:16833759</ext-link></comment></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dernoncourt</surname><given-names>F</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>JY</given-names> </name><name name-style="western"><surname>Uzuner</surname><given-names>O</given-names> </name><name name-style="western"><surname>Szolovits</surname><given-names>P</given-names> </name></person-group><article-title>De-identification of patient notes with recurrent neural networks</article-title><source>J Am Med Inform Assoc</source><year>2017</year><month>05</month><day>1</day><volume>24</volume><issue>3</issue><fpage>596</fpage><lpage>606</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocw156</pub-id><pub-id pub-id-type="medline">28040687</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Vaswani</surname><given-names>A</given-names> </name><name name-style="western"><surname>Shazeer</surname><given-names>N</given-names> </name><name name-style="western"><surname>Parmar</surname><given-names>N</given-names> </name><name name-style="western"><surname>Uszkoreit</surname><given-names>J</given-names> </name><name name-style="western"><surname>Jones</surname><given-names>L</given-names> </name><name name-style="western"><surname>Gomez</surname><given-names>AN</given-names> </name><etal/></person-group><article-title>Attention is all you need</article-title><source>NeurIPS Proceedings</source><year>2017</year><access-date>2025-03-31</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf">https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf</ext-link></comment></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Devlin</surname><given-names>J</given-names> </name><name name-style="western"><surname>Chang</surname><given-names>MW</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>K</given-names> </name><name name-style="western"><surname>Toutanova</surname><given-names>K</given-names> </name></person-group><article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title><source>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)</source><year>2019</year><fpage>4171</fpage><lpage>4186</lpage><pub-id pub-id-type="doi">10.18653/v1/N19-1423</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cheligeer</surname><given-names>C</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>G</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>S</given-names> </name><etal/></person-group><article-title>BERT-based neural network for inpatient fall detection from electronic medical records: retrospective cohort study</article-title><source>JMIR Med Inform</source><year>2024</year><month>01</month><day>30</day><volume>12</volume><fpage>e48995</fpage><pub-id pub-id-type="doi">10.2196/48995</pub-id><pub-id pub-id-type="medline">38289643</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><collab>OpenAI</collab><name name-style="western"><surname>Achiam</surname><given-names>J</given-names> </name><name name-style="western"><surname>Adler</surname><given-names>S</given-names> </name><name name-style="western"><surname>Agarwal</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ahmad</surname><given-names>L</given-names> </name><etal/></person-group><article-title>GPT-4 technical report</article-title><source>arXiv</source><comment>Preprint posted online on  Mar 4, 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2303.08774</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Bai</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Kadavath</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kundu</surname><given-names>S</given-names> </name><name name-style="western"><surname>Askell</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kernion</surname><given-names>J</given-names> </name><name name-style="western"><surname>Jones</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Constitutional AI: harmlessness from AI feedback</article-title><source>AI-Plans</source><year>2022</year><access-date>2025-03-31</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://ai-plans.com/file_storage/4f32fa39-3a01-46c7-878e-c92b7aa7165f_2212.08073v1.pdf">https://ai-plans.com/file_storage/4f32fa39-3a01-46c7-878e-c92b7aa7165f_2212.08073v1.pdf</ext-link></comment></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Gupta</surname><given-names>S</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>A</given-names> </name><etal/></person-group><article-title>OpenDeID pipeline for unstructured electronic health record text notes based on rules and transformers: deidentification algorithm development and validation study</article-title><source>J Med Internet Res</source><year>2023</year><month>12</month><day>6</day><volume>25</volume><fpage>e48145</fpage><pub-id pub-id-type="doi">10.2196/48145</pub-id><pub-id pub-id-type="medline">38055317</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="web"><article-title>Health insurance portability and accountability act of 1996 (HIPAA)</article-title><source>Centers for Disease Control and Prevention, Public Health Law</source><year>2024</year><access-date>2025-03-31</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cdc.gov/phlp/php/resources/health-insurance-portability-and-accountability-act-of-1996-hipaa.html?CDC_AAref_Val=https://www.cdc.gov/phlp/publications/topic/hipaa.html">https://www.cdc.gov/phlp/php/resources/health-insurance-portability-and-accountability-act-of-1996-hipaa.html?CDC_AAref_Val=https://www.cdc.gov/phlp/publications/topic/hipaa.html</ext-link></comment></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>X</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Cao</surname><given-names>C</given-names> </name><etal/></person-group><article-title>DeID-GPT: zero-shot medical text de-identification by GPT-4</article-title><source>arXiv</source><comment>Preprint posted online on  Dec 21, 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2303.11032</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Touvron</surname><given-names>H</given-names> </name><name name-style="western"><surname>Martin</surname><given-names>L</given-names> </name><name name-style="western"><surname>Stone</surname><given-names>K</given-names> </name><name name-style="western"><surname>Albert</surname><given-names>P</given-names> </name><name name-style="western"><surname>Almahairi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Babaei</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Llama 2: open foundation and fine-tuned chat models</article-title><source>arXiv</source><comment>Preprint posted online on  Jul 19, 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2307.09288</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Jiang</surname><given-names>AQ</given-names> </name><name name-style="western"><surname>Sablayrolles</surname><given-names>A</given-names> </name><name name-style="western"><surname>Mensch</surname><given-names>A</given-names> </name><name name-style="western"><surname>Bamford</surname><given-names>C</given-names> </name><name name-style="western"><surname>Chaplot</surname><given-names>DS</given-names> </name><name name-style="western"><surname>Casas</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Mistral 7B</article-title><source>arXiv</source><comment>Preprint posted online on  Oct 10, 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2310.06825</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Jiang</surname><given-names>AQ</given-names> </name><name name-style="western"><surname>Sablayrolles</surname><given-names>A</given-names> </name><name name-style="western"><surname>Roux</surname><given-names>A</given-names> </name><name name-style="western"><surname>Mensch</surname><given-names>A</given-names> </name><name name-style="western"><surname>Savary</surname><given-names>B</given-names> </name><name name-style="western"><surname>Bamford</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Mixtral of experts</article-title><source>arXiv</source><comment>Preprint posted online on  Jan 8, 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2401.04088</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Dettmers</surname><given-names>T</given-names> </name><name name-style="western"><surname>Pagnoni</surname><given-names>A</given-names> </name><name name-style="western"><surname>Holtzman</surname><given-names>A</given-names> </name><name name-style="western"><surname>Zettlemoyer</surname><given-names>L</given-names> </name></person-group><article-title>QLoRA: efficient finetuning of quantized llms. advances in neural information processing systems</article-title><source>NeurIPS Proceedings</source><year>2023</year><access-date>2025-03-31</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://proceedings.neurips.cc/paper_files/paper/2023/file/1feb87871436031bdc0f2beaa62a049b-Paper-Conference.pdf">https://proceedings.neurips.cc/paper_files/paper/2023/file/1feb87871436031bdc0f2beaa62a049b-Paper-Conference.pdf</ext-link></comment></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Loshchilov</surname><given-names>I</given-names> </name><name name-style="western"><surname>Hutter</surname><given-names>F</given-names> </name></person-group><article-title>Decoupled weight decay regularization</article-title><source>arXiv</source><comment>Preprint posted online on  Jan 4, 2019</comment><pub-id pub-id-type="doi">10.48550/arXiv.1711.05101</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Papineni</surname><given-names>K</given-names> </name><name name-style="western"><surname>Roukos</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ward</surname><given-names>T</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>WJ</given-names> </name></person-group><article-title>BLEU: a method for automatic evaluation of machine translation</article-title><source>Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics</source><publisher-name>Association for Computational Linguistics</publisher-name><fpage>311</fpage><lpage>318</lpage><pub-id pub-id-type="doi">10.3115/1073083.1073135</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="web"><source>Informatics for Integrating Biology &#x0026; the Bedside (i2b2)</source><access-date>2025-03-31</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.i2b2.org/">https://www.i2b2.org/</ext-link></comment></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Saeed</surname><given-names>M</given-names> </name><name name-style="western"><surname>Villarroel</surname><given-names>M</given-names> </name><name name-style="western"><surname>Reisner</surname><given-names>AT</given-names> </name><etal/></person-group><article-title>Multiparameter Intelligent Monitoring in Intensive Care II: a public-access intensive care unit database</article-title><source>Crit Care Med</source><year>2011</year><month>05</month><volume>39</volume><issue>5</issue><fpage>952</fpage><lpage>960</lpage><pub-id pub-id-type="doi">10.1097/CCM.0b013e31820a92c6</pub-id><pub-id pub-id-type="medline">21283005</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Johnson</surname><given-names>AEW</given-names> </name><name name-style="western"><surname>Pollard</surname><given-names>TJ</given-names> </name><name name-style="western"><surname>Shen</surname><given-names>L</given-names> </name><etal/></person-group><article-title>MIMIC-III, a freely accessible critical care database</article-title><source>Sci Data</source><year>2016</year><month>05</month><day>24</day><volume>3</volume><fpage>160035</fpage><pub-id pub-id-type="doi">10.1038/sdata.2016.35</pub-id><pub-id pub-id-type="medline">27219127</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>L</given-names> </name><name name-style="western"><surname>Perez-Concha</surname><given-names>O</given-names> </name><name name-style="western"><surname>Nguyen</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Web-based application based on human-in-the-loop deep learning for deidentifying free-text data in electronic medical records: development and usability study</article-title><source>Interact J Med Res</source><year>2023</year><month>08</month><day>25</day><volume>12</volume><fpage>e46322</fpage><pub-id pub-id-type="doi">10.2196/46322</pub-id><pub-id pub-id-type="medline">37624624</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Grouin</surname><given-names>C</given-names> </name><name name-style="western"><surname>Zweigenbaum</surname><given-names>P</given-names> </name></person-group><article-title>Automatic de-identification of French clinical records: comparison of rule-based and machine-learning approaches</article-title><source>Stud Health Technol Inform</source><year>2013</year><volume>192</volume><fpage>476</fpage><lpage>480</lpage><pub-id pub-id-type="medline">23920600</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chazard</surname><given-names>E</given-names> </name><name name-style="western"><surname>Mouret</surname><given-names>C</given-names> </name><name name-style="western"><surname>Ficheur</surname><given-names>G</given-names> </name><name name-style="western"><surname>Schaffar</surname><given-names>A</given-names> </name><name name-style="western"><surname>Beuscart</surname><given-names>JB</given-names> </name><name name-style="western"><surname>Beuscart</surname><given-names>R</given-names> </name></person-group><article-title>Proposal and evaluation of FASDIM, a Fast and Simple De-Identification Method for unstructured free-text clinical records</article-title><source>Int J Med Inform</source><year>2014</year><month>04</month><volume>83</volume><issue>4</issue><fpage>303</fpage><lpage>312</lpage><pub-id pub-id-type="doi">10.1016/j.ijmedinf.2013.11.005</pub-id><pub-id pub-id-type="medline">24370391</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Catelli</surname><given-names>R</given-names> </name><name name-style="western"><surname>Gargiulo</surname><given-names>F</given-names> </name><name name-style="western"><surname>Casola</surname><given-names>V</given-names> </name><name name-style="western"><surname>De Pietro</surname><given-names>G</given-names> </name><name name-style="western"><surname>Fujita</surname><given-names>H</given-names> </name><name name-style="western"><surname>Esposito</surname><given-names>M</given-names> </name></person-group><article-title>Crosslingual named entity recognition for clinical de-identification applied to a COVID-19 Italian data set</article-title><source>Appl Soft Comput</source><year>2020</year><month>12</month><volume>97</volume><fpage>106779</fpage><pub-id pub-id-type="doi">10.1016/j.asoc.2020.106779</pub-id><pub-id pub-id-type="medline">33052197</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Berg</surname><given-names>H</given-names> </name><name name-style="western"><surname>Henriksson</surname><given-names>A</given-names> </name><name name-style="western"><surname>Dalianis</surname><given-names>H</given-names> </name></person-group><article-title>The impact of de-identification on downstream named entity recognition in clinical text</article-title><source>Proceedings of the 11th International Workshop on Health Text Mining and Information Analysis</source><year>2020</year><publisher-name>Association for Computational Linguistics</publisher-name><fpage>1</fpage><lpage>11</lpage><pub-id pub-id-type="doi">10.18653/v1/2020.louhi-1.1</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Syed</surname><given-names>M</given-names> </name><name name-style="western"><surname>Sexton</surname><given-names>K</given-names> </name><name name-style="western"><surname>Greer</surname><given-names>M</given-names> </name><etal/></person-group><article-title>DeIDNER Model: A Neural Network Named Entity Recognition Model for Use in the De-identification of Clinical Notes</article-title><source>Biomed Eng Syst Technol Int Jt Conf BIOSTEC Revis Sel Pap</source><year>2022</year><month>02</month><volume>5</volume><fpage>640</fpage><lpage>647</lpage><pub-id pub-id-type="doi">10.5220/0010884500003123</pub-id><pub-id pub-id-type="medline">35386186</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Tchouka</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Couchot</surname><given-names>JF</given-names> </name><name name-style="western"><surname>Coulmeau</surname><given-names>M</given-names> </name><name name-style="western"><surname>Laiymani</surname><given-names>D</given-names> </name><name name-style="western"><surname>Rahmani</surname><given-names>A</given-names> </name></person-group><article-title>De-identification of french unstructured clinical notes for machine learning tasks</article-title><source>arXiv</source><comment>Preprint posted online on  Oct 6, 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2209.09631</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Meaney</surname><given-names>C</given-names> </name><name name-style="western"><surname>Hakimpour</surname><given-names>W</given-names> </name><name name-style="western"><surname>Kalia</surname><given-names>S</given-names> </name><name name-style="western"><surname>Moineddin</surname><given-names>R</given-names> </name></person-group><article-title>A comparative evaluation of transformer models for de-identification of clinical text data</article-title><source>arXiv</source><comment>Preprint posted online on  Mar 25, 2022</comment><pub-id pub-id-type="doi">10.48550/arXiv.2204.07056</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Arnaud</surname><given-names>E</given-names> </name><name name-style="western"><surname>Elbattah</surname><given-names>M</given-names> </name><name name-style="western"><surname>Moreno-S&#x00E1;nchez</surname><given-names>PA</given-names> </name><name name-style="western"><surname>Dequen</surname><given-names>G</given-names> </name><name name-style="western"><surname>Ghazali</surname><given-names>DA</given-names> </name></person-group><article-title>Explainable NLP model for predicting patient admissions at emergency department using triage notes</article-title><source>2023 IEEE International Conference on Big Data (BigData)</source><publisher-name>IEEE</publisher-name><fpage>4843</fpage><lpage>4847</lpage><pub-id pub-id-type="doi">10.1109/BigData59044.2023.10386753</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Ribeiro</surname><given-names>MT</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>S</given-names> </name><name name-style="western"><surname>Guestrin</surname><given-names>C</given-names> </name></person-group><article-title>&#x201C;Why should I trust you?&#x201D;: explaining the predictions of any classifier</article-title><source>arXiv</source><comment>Preprint posted online on  Aug 9, 2016</comment><pub-id pub-id-type="doi">10.48550/arXiv.1602.04938</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Examples of French nursing notes.</p><media xlink:href="ai_v4i1e57828_app1.docx" xlink:title="DOCX File, 16 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Analysis of performance evaluation on corrected test set.</p><media xlink:href="ai_v4i1e57828_app2.docx" xlink:title="DOCX File, 15 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Comparative table of statistical results from previous studies.</p><media xlink:href="ai_v4i1e57828_app3.docx" xlink:title="DOCX File, 20 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4</label><p>Comparative table of recall across PII categories from previous studies. PII: personal identifying information.</p><media xlink:href="ai_v4i1e57828_app4.docx" xlink:title="DOCX File, 12 KB"/></supplementary-material></app-group></back></article>