<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR AI</journal-id>
      <journal-title>JMIR AI</journal-title>
      <issn pub-type="epub">2817-1705</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v2i1e40755</article-id>
      <article-id pub-id-type="pmid">38875541</article-id>
      <article-id pub-id-type="doi">10.2196/40755</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Patient Embeddings From Diagnosis Codes for Health Care Prediction Tasks: Pat2Vec Machine Learning Framework</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>El Emam</surname>
            <given-names>Khaled</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Malin</surname>
            <given-names>Bradley</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Sarejloo</surname>
            <given-names>Shirin</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Aslam</surname>
            <given-names>Muhammad Shahzad</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Boie</surname>
            <given-names>Sebastian Daniel</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zhang</surname>
            <given-names>Wei</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Steiger</surname>
            <given-names>Edgar</given-names>
          </name>
          <degrees>DPhil</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Zi Data Science Lab</institution>
            <institution>Department IT and Data Science</institution>
            <institution>Central Research Institute of Ambulatory Health Care in Germany (Zi)</institution>
            <addr-line>Salzufer 8</addr-line>
            <addr-line>Berlin, 10587</addr-line>
            <country>Germany</country>
            <phone>49 40052485</phone>
            <email>esteiger@zi.de</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9937-4007</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Kroll</surname>
            <given-names>Lars Eric</given-names>
          </name>
          <degrees>DPhil</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6626-7600</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Zi Data Science Lab</institution>
        <institution>Department IT and Data Science</institution>
        <institution>Central Research Institute of Ambulatory Health Care in Germany (Zi)</institution>
        <addr-line>Berlin</addr-line>
        <country>Germany</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Edgar Steiger <email>esteiger@zi.de</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2023</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>21</day>
        <month>4</month>
        <year>2023</year>
      </pub-date>
      <volume>2</volume>
      <elocation-id>e40755</elocation-id>
      <history>
        <date date-type="received">
          <day>4</day>
          <month>7</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>15</day>
          <month>11</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>9</day>
          <month>12</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>18</day>
          <month>3</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Edgar Steiger, Lars Eric Kroll. Originally published in JMIR AI (https://ai.jmir.org), 21.04.2023.</copyright-statement>
      <copyright-year>2023</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR AI, is properly cited. The complete bibliographic information, a link to the original publication on https://www.ai.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://ai.jmir.org/2023/1/e40755" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>In health care, diagnosis codes in claims data and electronic health records (EHRs) play an important role in data-driven decision making. Any analysis that uses a patient’s diagnosis codes to predict future outcomes or describe morbidity requires a numerical representation of this diagnosis profile made up of string-based diagnosis codes. These numerical representations are especially important for machine learning models. Most commonly, binary-encoded representations have been used, usually for a subset of diagnoses. In real-world health care applications, several issues arise: patient profiles show high variability even when the underlying diseases are the same, they may have gaps and not contain all available information, and a large number of appropriate diagnoses must be considered.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We herein present Pat2Vec, a self-supervised machine learning framework inspired by neural network–based natural language processing that embeds complete diagnosis profiles into a small real-valued numerical vector.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Based on German outpatient claims data with diagnosis codes according to the International Statistical Classification of Diseases and Related Health Problems, 10th Revision (ICD-10), we discovered an optimal vectorization embedding model for patient diagnosis profiles with Bayesian optimization for the hyperparameters. The calibration process ensured a robust embedding model for health care–relevant tasks by aggregating the metrics of different regression and classification tasks using different machine learning algorithms (linear and logistic regression as well as gradient-boosted trees). The models were tested against a baseline model that binary encodes the most common diagnoses. The study used diagnosis profiles and supplementary data from more than 10 million patients from 2016 to 2019 and was based on the largest German ambulatory claims data set. To describe subpopulations in health care, we identified clusters (via density-based clustering) and visualized patient vectors in 2D (via dimensionality reduction with uniform manifold approximation). Furthermore, we applied our vectorization model to predict prospective drug prescription costs based on patients’ diagnoses.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Our final models outperform the baseline model (binary encoding) with equal dimensions. They are more robust to missing data and show large performance gains, particularly in lower dimensions, demonstrating the embedding model’s compression of nonlinear information. In the future, other sources of health care data can be integrated into the current diagnosis-based framework. Other researchers can apply our publicly shared embedding model to their own diagnosis data.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>We envision a wide range of applications for Pat2Vec that will improve health care quality, including personalized prevention and signal detection in patient surveillance as well as health care resource planning based on subcohorts identified by our data-driven machine learning framework.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>electronic health records</kwd>
        <kwd>ICD</kwd>
        <kwd>machine learning</kwd>
        <kwd>health care</kwd>
        <kwd>data</kwd>
        <kwd>diagnosis</kwd>
        <kwd>model</kwd>
        <kwd>drug</kwd>
        <kwd>drug prescription</kwd>
        <kwd>performance</kwd>
        <kwd>applications</kwd>
        <kwd>quality</kwd>
        <kwd>prevention</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Public health surveillance and health care research in many countries depend on electronic health records (EHRs), including claims data [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref4">4</xref>]. In these records, patients’ medical diagnoses are often coded according to a string-based disease classification convention, for example, the International Statistical Classification of Diseases and Related Health Problems, 10th Revision (ICD-10) [<xref ref-type="bibr" rid="ref5">5</xref>]. Their sequence of ICD codes characterizes the medical history of every patient.</p>
      <p>Common tasks in clinical, epidemiological, or health care research on claims data expect numerical input (eg, regression and classification tasks such as linear or logistic regression or advanced machine learning tools such as gradient-boosted trees and deep learning). These methods are often used to predict specific health outcomes [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref17">17</xref>] or the utilization of health care institutions [<xref ref-type="bibr" rid="ref18">18</xref>-<xref ref-type="bibr" rid="ref22">22</xref>].</p>
      <p>To derive numerical input for these methods from the string-based diagnosis profiles, a procedure called binary encoding (or binarization, one-hot encoding) is most often used [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref24">24</xref>]. Using binary encoding, diagnoses are represented numerically by either 1 or 0, if the patient had or did not have the chosen diagnosis, respectively. As the pool of possible diagnostic codes is vast, binary encoding usually relies on a selected subset of diagnoses chosen by either field experts [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref16">16</xref>] or data-driven feature selection [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. Diagnoses can also be represented by the number of times they appear [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. Most often, they are pooled into clinical groups before further analysis [<xref ref-type="bibr" rid="ref18">18</xref>-<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref27">27</xref>-<xref ref-type="bibr" rid="ref29">29</xref>].</p>
      <p>Ideally, a disease classification such as ICD-10 would only cover clearly distinguishable medical conditions and concepts, but in reality, we have to deal with overlaps and uncertainties. Therefore, a faithful numerical representation of the patient’s medical history needs to take into account that different ICD codes may represent similar or even identical underlying issues. Frequently, computational and methodological constraints limit the number of diagnoses and interaction effects that can be considered. Binary encoding suffers in this regard, as it considers medical diagnoses as distinctive and unrelated features. As such, it limits the methodical progress of prediction tasks on claims data, especially the application of advanced machine learning methods. Thus, other methods of numerical representation of ICD diagnosis codes should be investigated to enable better individual health care and more precise prediction of health care demand.</p>
      <p>We investigate herein how a real-valued numerical representation (or vectorization, embedding) (see Chapter 15 in [<xref ref-type="bibr" rid="ref30">30</xref>]) of patients’ medical diagnosis profiles that uses their whole diagnostic ICD profiles can be derived. This embedding should compress the information from up to 14,877 possible 5-digit International Statistical Classification of Diseases and Related Health Problems, 10th revision, German Modification (ICD-10-GM) 2019 [<xref ref-type="bibr" rid="ref31">31</xref>] codes, improve the performance of common health care prediction tasks, and let advanced (nonlinear) machine learning methods reach their full potential when used on claims data.</p>
      <p>To find such an embedding, we employ a self-supervised machine learning algorithm inspired by natural language processing (NLP), namely, Doc2Vec [<xref ref-type="bibr" rid="ref32">32</xref>], which itself is an extension of Word2Vec [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>]. It has been applied to nonlanguage-specific tasks before [<xref ref-type="bibr" rid="ref35">35</xref>-<xref ref-type="bibr" rid="ref37">37</xref>]. Many studies [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref38">38</xref>-<xref ref-type="bibr" rid="ref42">42</xref>] have investigated embeddings of the ICD codes themselves, whereas some [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref42">42</xref>] arrived at patient-level embeddings for specific prediction tasks (Supplementary Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Here, we want to broaden the scope of the possible applications to general health care–related questions. It has been shown that hyperparameter tuning for Word2Vec and Doc2Vec can lead to considerably better results, especially on nonlanguage-related tasks [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref37">37</xref>]. As such, we employ a Bayesian search on a hyperparameter grid to identify an optimal model for the vector embedding procedure. We evaluate our embedding model on broad health care prediction tasks with standard (linear and logistic regression) and advanced machine learning techniques (gradient-boosted trees). We also test how well the vectorization works with smaller data sets and how well it handles missing data with random data dropout sampling. In addition, we inspect the results visually in a 2D projected space along with a clustering of the embedded patient profiles to reveal the properties of our cohort. Finally, we evaluate the resulting vectorization model for the health care–relevant task of predicting drug spending at the patient level.</p>
      <p>Our method gave better results than binary encoding, but only after tuning the hyperparameters and on large enough data sets. The compression of the information of thousands of ICD-10 codes into a vector space of no more than 100 dimensions was achieved. We observed large performance gains using gradient-boosted trees with the vector embedding over classic linear or logistic regression with binary-encoded data. In addition, the vectorization models are more robust to missing data than baseline binary encoding. The final model learned on our extensive data can be shared and used by other stakeholders on much smaller data sets (eg, for supervised machine learning methods that predict clinical or other health care outcomes).</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data</title>
        <p>The diagnosis data are based on comprehensive nationwide outpatient claims data from 2016 to 2019 of all patients with statutory health insurance (SHI) in Germany. According to the Federal Statistical Office [<xref ref-type="bibr" rid="ref43">43</xref>], there were 73,009,237 persons eligible for the SHI (87.8% of the population) in 2019. The pseudonymous data include diagnoses for all people in Germany with SHI who visited an outpatient physician in 2016 or later. Among others, the data include demographic characteristics such as age and gender, as well as diagnoses with markers of certainty and other billing-relevant information. These data do not contain information on inpatient treatment in hospitals. Diagnoses are coded according to the ICD-10-GM [<xref ref-type="bibr" rid="ref31">31</xref>]. In addition to the diagnosis data, we extracted individual information on prescribed and dispensed medications from the pseudonymous data of nationwide outpatient drug prescriptions. The claims data and the prescription data are linked by patient information (compare [<xref ref-type="bibr" rid="ref44">44</xref>]).</p>
        <p>We chose N=11,200,000 patients at random from the full population of people with SHI because technical limitations make it impossible to use the full data. To achieve this study sample size, we shuffled all patients in the claims database randomly and selected the top N records for the sample. All patients with at least one data entry after 2016 were eligible. The sample is divided into 4 data sets by random subsampling from the study population (<xref ref-type="boxed-text" rid="box1">Textbox 1</xref>).</p>
        <p>These samples were filtered for patients with consistent information regarding gender and age during the years considered for analysis (2016 to 2019). The training data in (1) for the vectorization model were restricted to ICD-10 codes (5-digit notation) from 2016 to 2018, whereas the calibration, validation, and test sets in (2)-(4) were restricted to codes from 2018. Only patients with at least one confirmed diagnosis during the period in question were kept. This left us with sample sizes of 8,941,773 (vectorization training), 830,285 (calibration training), 82,924 (validation), and 82,937 (test), see <xref rid="figure1" ref-type="fig">Figure 1</xref>.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Flowchart of data sampling and algorithmic schematic. Patient data flows are represented by solid, straight lines, while machine learning models and other meta-information flows are represented by dashed, curved lines. Rectangles are patient data, while hexagons are algorithms or analysis methods. AUROC: area under the receiver operating characteristic curve; ML: machine learning; SHI: statutory health insurance.</p>
          </caption>
          <graphic xlink:href="ai_v2i1e40755_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Because of the regulations of the German health care system (see “The German Health Care System” in [<xref ref-type="bibr" rid="ref45">45</xref>], or a more detailed description of the German system in [<xref ref-type="bibr" rid="ref46">46</xref>]), diagnoses are available on a quarterly basis (but without temporal order within a quarter), with reference to cases and places of treatment. As such, we generated a sequence of codes for each patient with a certain temporal order: confirmed diagnoses are grouped by case and place of treatment, and these groups are ordered by temporal succession of quarters, but if more than 1 group appears within one-quarter, these groups are shuffled randomly within the quarter (as well as diagnoses within a group).</p>
        <p>Furthermore, when training the model (see below), only diagnoses that were seen at least 100 times in the training data were taken into account.</p>
        <p>As health care–relevant outcomes in (2)-(4), we used 4 different quantities for calibration: the <italic>number of cases</italic> (a proxy indicator for the number of medical consultations), (ambulatory) <italic>emergency health care utilization</italic>, <italic>age</italic>, and <italic>gender</italic>. The number of cases in 2019 is approximate due to data limitations: a case is defined as the unique combination of a quarter, a patient, a treating medical facility, the billing association of SHI physicians, and the time stamp of data processing. The binary outcome of emergency health care utilization is 1 if at least one case in 2019 of the respective patient was billed as an emergency, and 0 otherwise. The sociodemographic variables age (in years) and gender (binary-encoded) were also extracted from the data.</p>
        <p>As data for robustness analysis against diagnosis dropout, we randomly dropped 10%, 25%, or 50% of diagnosis codes for each patient (rounded to nearest number, but kept at least one code).</p>
        <p>As data for robustness analysis against varying training data set sizes, we used different percentages of the original vectorization training data (reducing the vectorization data from 10 million patients to 10,000 patients).</p>
        <p>For a further analysis, we extracted the drug prescription costs from the ambulatory drug prescription data of residents in Germany with SHI. These costs are the total (in euros) of all billed prescribed drugs for the respective patient in 2019 (if any, otherwise 0).</p>
        <boxed-text id="box1" position="float">
          <title>Data sets obtained by random subsampling from the study population.</title>
          <list list-type="order">
            <list-item>
              <p>
                <bold>Vectorization</bold>
              </p>
              <p>A total of 10,000,000 patients as a vectorization training set for self-supervised machine learning to learn a model for numerical representation (embedding) of patients’ profiles.</p>
            </list-item>
            <list-item>
              <p>
                <bold>Calibration</bold>
              </p>
              <p>A total of 1,000,000 patients with embeddings based on a model from (1) serving as a calibration training set for supervised machine learning on prediction tasks.</p>
            </list-item>
            <list-item>
              <p>
                <bold>Validation</bold>
              </p>
              <p>A total of 100,000 patients with embeddings based on a model from (1) serving as a validation set for the calibration prediction models learned in (2) and, in turn, hyperparameter tuning of vectorization in (1).</p>
            </list-item>
            <list-item>
              <p>
                <bold>Test</bold>
              </p>
              <p>A total of 100,000 patients as a test set for final analysis and presentation of the results.</p>
            </list-item>
          </list>
        </boxed-text>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>The use of claims data for this analysis is governed by the German Code of Social Law (SGB X 80 in conjunction with SGB V 68c): our study aims to improve health care quality by exploring diagnoses profiles and predicting health care–relevant outcomes. While approval and consent of individual human patients within the cohort are operationally impossible to acquire, they are also not required by the German Code of Social Law as we used deidentified, routinely collected data in a retrospective study. In addition, we argue that the conclusions we can draw from our analyses are in the best interest of patients and will improve future public health services.</p>
      </sec>
      <sec>
        <title>Binary Encoding and Baseline Model</title>
        <p>Binary encoding creates a data matrix with rows for patients and columns for variables. Each variable represents one of the diagnoses being looked at (out of a chosen subset of all available diagnoses) and is given a 1 in the corresponding row and column if the patient had that diagnosis and a 0 if they did not.</p>
        <p>Here, we employ such a binary encoding approach as a baseline model: First, we sorted all confirmed unique ICD-10 diagnosis codes from 2019 by the number of patients with this diagnosis in the data. Second, for a given number M of top diagnoses and the sample patients from above, we formed the appropriate data matrix with M columns corresponding to the top M diagnoses and each row representing a patient, using binary encoding like described above. This is the baseline model for numerization of the diagnosis codes and will be compared with the real-valued patient-level embedding described in the next section.</p>
      </sec>
      <sec>
        <title>ICD2Vec and Pat2Vec</title>
        <p>Similar to [<xref ref-type="bibr" rid="ref14">14</xref>], we used an advanced approach to a real-valued embedding of diagnosis codes, applying a method from NLP called Word2Vec and its extension Doc2Vec [<xref ref-type="bibr" rid="ref32">32</xref>-<xref ref-type="bibr" rid="ref34">34</xref>]. Trained on a corpus of text data, Word2Vec vectorizes individual words and keeps their semantic meaning by mapping similar or related words to similar vectors (according to multidimensional distance measures in a Euclidean space) and antagonistic words to diverging vectors. As an extension to Word2Vec, the Doc2Vec algorithm also learns vectors for each document. Similar documents are represented by vectors that are similar to those of the similar documents.</p>
        <p>Word2Vec is in fact a (shallow) neural network in the sense that individual words are represented by vectors (embeddings) of a fixed size, and the entries of these vectors are used directly to predict the vectors of other words in a single-layer neural network; that is, the embeddings are themselves the parameters of the single hidden layer. Word2Vec goes over every word in each document step-by-step and repeatedly during training and updates the neural network’s parameters (or rather, the embeddings) by either predicting from the current word the neighboring or context words as targets (skip-gram) or predicting a target word from the neighboring or context words (continuous bag of words) [<xref ref-type="bibr" rid="ref33">33</xref>]. In both cases, the update to the network’s parameters after training on a single word would include updating all parameters for all words that are not in the context. For computational efficiency (because of large vocabularies), this is circumvented by either updating only some negative examples of words that are not in the context of the word under consideration [<xref ref-type="bibr" rid="ref34">34</xref>] or by applying a hierarchical softmax to the network update [<xref ref-type="bibr" rid="ref33">33</xref>]. In fact, it is also possible to apply both techniques at the same time.</p>
        <p>Doc2Vec is an extension to the Word2Vec algorithm in the sense that it is applied in parallel to Word2Vec. Additionally, while learning the vector embeddings of every word in the corpus, the vector embeddings of the documents that form the corpus are learned in the same manner. Doc2Vec can be trained in 2 different ways [<xref ref-type="bibr" rid="ref32">32</xref>]: either with “distributed memory” (DM; similar to Word2Vec’s continuous bag of words), where each target word from the document is predicted using both the context words and the document’s embedding, or with “distributed bag of words” (DBOW; similar to Word2Vec’s skip-gram), where target words from the document are predicted using the document itself and separately updating the context words.</p>
        <p>For more background on neural networks and how they are applied to NLP tasks, see [<xref ref-type="bibr" rid="ref47">47</xref>] and [<xref ref-type="bibr" rid="ref48">48</xref>].</p>
        <p>In our framework, we treat every ICD-10 diagnosis code as a word and the sequence of diagnosis codes for a patient as a document. These documents are our corpus data for training ICD2Vec (by applying Word2Vec to ICD-10 codes) and Pat2Vec (by applying Doc2Vec to patients’ sequences of diagnosis codes).</p>
        <p>For training the 2Vec algorithms, we have to choose a vector size of M (among other parameters; see below). Pat2Vec is trained on the patients’ sample data and then gives us a data matrix with M columns, where each row or patient is a vector of length M (the embedding of the corresponding patient), encoding <italic>all</italic> of their diagnoses. Additionally, we obtain in parallel a vectorization of the ICD-10 codes themselves (Word2Vec/ICD2Vec), where each code is represented by a vector.</p>
      </sec>
      <sec>
        <title>Hyperparameter Tuning</title>
        <p>The 2Vec algorithms need several parameters as input for the training of the vectorization model. These are referred to as hyperparameters and have different considered ranges (<xref ref-type="boxed-text" rid="box2">Textbox 2</xref>).</p>
        <p>Following previous research [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref37">37</xref>], we tuned the hyperparameters for the vectorization model using a Bayesian hyperparameter optimization [<xref ref-type="bibr" rid="ref49">49</xref>] over the ranges given above. We calibrated and validated the resulting vectorization models with supervised machine learning (see the next section) using the holdout calibration and validation data on the 4 calibration outcomes.</p>
        <boxed-text id="box2" position="float">
          <title>Hyperparameters and their ranges.</title>
          <list list-type="order">
            <list-item>
              <p>
                <bold>Vector size (100)</bold>
              </p>
              <p>Length of the vector assigned to each patient. We hold this fixed while tuning the hyperparameters, but we will vary this value afterward for comparisons.</p>
            </list-item>
            <list-item>
              <p>
                <bold>Minimal count (100)</bold>
              </p>
              <p>Only diagnoses that appear at least 100 times in the data are considered for anonymization purposes because of rare diseases. We will not optimize this parameter.</p>
            </list-item>
            <list-item>
              <p>
                <bold>Window size (1-10)</bold>
              </p>
              <p>Describes how many of the neighboring codes will be considered in each training step within the 2Vec algorithm and a given sequence of codes.</p>
            </list-item>
            <list-item>
              <p>
                <bold>Downsampling</bold>
              </p>
              <p>Smaller values of the downsampling parameter mean that more of the most common words will be randomly excluded from the training data (default 0.001). After preliminary analysis, we observed that downsampling is always detrimental to our task, so we did not downsample our data.</p>
            </list-item>
            <list-item>
              <p>
                <bold>Epochs (1-20)</bold>
              </p>
              <p>The number of training epochs describes how many times each patient’s code sequence will be looked at to update the vectorization model.</p>
            </list-item>
            <list-item>
              <p>
                <bold>Negative sampling (0-20)</bold>
              </p>
              <p>For each update of a word and its neighboring words (within the window size range), this gives the number of random words not within the window that will be updated as negative examples; 0 for no negative sampling.</p>
            </list-item>
            <list-item>
              <p>
                <bold>Negative sampling exponent (–5 to 5)</bold>
              </p>
              <p>Smoothing exponent for the updates of the negative samples.</p>
            </list-item>
            <list-item>
              <p>
                <bold>Hierarchical softmax (Boolean)</bold>
              </p>
              <p>This parameter describes how the network parameters will be updated at the end of each training step; true for hierarchical softmax and false for no hierarchical softmax.</p>
            </list-item>
            <list-item>
              <p>
                <bold>Distributed memory or distributed bag of words (Boolean)</bold>
              </p>
              <p>Training of document vectors in either distributed memory (DM) or distributed bag of words (DBOW) fashion (see above); true for DM and false for DBOW.</p>
            </list-item>
            <list-item>
              <p>
                <bold>Alpha (0.001-0.1)</bold>
              </p>
              <p>Learning rate of the neural network updates.</p>
            </list-item>
          </list>
        </boxed-text>
      </sec>
      <sec>
        <title>Regression and Classification Methods</title>
        <sec>
          <title>Overview</title>
          <p>The data matrices generated by binary encoding or Pat2Vec served as input data for prediction algorithms on the 4 calibration outcomes (number of cases, emergency health care utilization, age, and gender). The employed algorithms are described below, where LightGBM refers to the light gradient-boosted machine algorithm [<xref ref-type="bibr" rid="ref50">50</xref>].</p>
        </sec>
        <sec>
          <title>Regression</title>
          <p>For the real-valued count outcomes of age and number of cases, we employed 2 different regression techniques: linear regression and an ensemble decision tree–based regression algorithm with gradient boosting (LightGBM Regressor) [<xref ref-type="bibr" rid="ref50">50</xref>-<xref ref-type="bibr" rid="ref52">52</xref>]. We chose LightGBM over other gradient-boosted tree methods because of its performance and fast training time [<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref53">53</xref>,<xref ref-type="bibr" rid="ref54">54</xref>]. Linear regression does not have additional input parameters; LightGBM was used out of the box without parameter optimization. The goodness of fit was measured by the <italic>R</italic><sup>2</sup> and 1 minus the relative mean absolute error (also known as Cumming predictive measure [CPM]) [<xref ref-type="bibr" rid="ref55">55</xref>].</p>
        </sec>
        <sec>
          <title>Classification</title>
          <p>For the binary outcomes of gender and emergency usage, we employed 2 different classification techniques: logistic regression and an ensemble decision tree–based classification algorithm with gradient boosting (LightGBM Classifier) [<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref56">56</xref>]. Logistic regression does not have additional input parameters; LightGBM was used out of the box without parameter optimization. The goodness of fit was measured by the area under the receiver operating characteristic curve and the area under the precision-recall curve.</p>
        </sec>
      </sec>
      <sec>
        <title>Final Model</title>
        <p>The final model was chosen with Bayesian optimization of the hyperparameters by aggregating the 16 performance measures: 2 approaches with linear/logistic regression and gradient-boosted trees, and 2 measures for each of the 4 outcomes (<italic>R</italic><sup>2</sup> and CPM for regression, receiver operating characteristic curve and area under the precision-recall curve for classification). All of these measures are in the range of 0 and 1, with higher values indicating better performance but varying in size and range between the 4 different outcomes and measures. As such, we took the performance measure values of the top 100 diagnoses baseline model as reference values. For each trial in the Bayesian optimization and its respective vectorization model, we calculated the 16 performance measures and divided them by the respective reference value from the top 100 diagnoses baseline model. We then aggregated these rates by calculating their arithmetic mean as a total score (ie, this gives a reference score of 1 for the top 100 diagnoses baseline model). The final model was chosen based on the best total score after this aggregation (<xref rid="figure1" ref-type="fig">Figure 1</xref>).</p>
        <p>We then trained embedding models with the same hyperparameter configuration as the final model, but with different vector sizes M. Likewise, we derived the binary encoding matrices of the top M diagnoses for varying sizes of M. These embedding and binarization models were compared on the same prediction tasks described above on the holdout test data. The same procedures were replicated on the different data sets for robustness analysis (diagnosis dropout and reduced training data size, respectively).</p>
        <p>Additionally, we conducted an exploratory and visual analysis of the vector embeddings from the Pat2Vec vectorization on the test data. To this end, we projected the 100D patient vector embeddings into 2 dimensions using the uniform manifold approximation and projection (UMAP) algorithm [<xref ref-type="bibr" rid="ref57">57</xref>]. In addition, these projections were clustered using hierarchical density–based clustering (hierarchical density–based spatial clustering of applications with noise [HDBSCAN]) [<xref ref-type="bibr" rid="ref58">58</xref>]. We assessed the general demographic and health care properties of the clusters and identified overexpressed ICD-10 codes within each cluster as the codes that have the largest positive difference in their share within the respective cluster compared with their share in the general population. As an explainability analysis, we analyzed how ICD-10 diagnosis codes are associated with specific dimensions of the vector embedding of size 100. To this end, we calculated correlations over all patients in the test data between a subset of 60 relevant ICD-10 diagnosis codes, binary encoded per patient, and the 100 vector dimensions.</p>
        <p>Furthermore, we predicted drug spending costs using the final embedding model with a vector size of 100 and the baseline model. We compared the performance (<italic>R</italic><sup>2</sup>, mean absolute error, and CPM), again with linear regression and the gradient-boosted trees algorithm for regression (LightGBM Regressor). We also added age and gender as additional predictors to these models. Here, we tuned the hyperparameters of the LightGBM method using Bayesian optimization to achieve its full potential.</p>
      </sec>
      <sec>
        <title>Software</title>
        <p>Analysis was conducted primarily in the Python programming language (Python Software Foundation) [<xref ref-type="bibr" rid="ref59">59</xref>], with additional analyses in the R statistical programming language (The R Foundation) [<xref ref-type="bibr" rid="ref60">60</xref>]. Pat2Vec was implemented using the Gensim package [<xref ref-type="bibr" rid="ref61">61</xref>] for Python with hyperparameter tuning via the Optuna package [<xref ref-type="bibr" rid="ref62">62</xref>]. Machine learning prediction tasks were conducted with scikit-learn (linear and logistic regression, [<xref ref-type="bibr" rid="ref63">63</xref>]) and the LightGBM Python package [<xref ref-type="bibr" rid="ref50">50</xref>], while 2D projection and clustering were based on the UMAP package [<xref ref-type="bibr" rid="ref57">57</xref>] and the HDBSCAN package [<xref ref-type="bibr" rid="ref58">58</xref>], respectively. Final visualizations were prepared in R with the ggplot2 package [<xref ref-type="bibr" rid="ref64">64</xref>].</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Sample Characteristics</title>
        <p>After filtering the original sample of 11,200,000 patients, the data were limited to 9,937,919 patients. The average age of the patients was 45.2 years; 54.60% (5,426,481/9,937,919) of the cohort were female. The average number of cases per patient in 2019 was 8.4. About 18.32% (1,820,736/9,937,919) of the cohort had at least one emergency in 2019. The average drug spending in 2019 was €632.1 (US $683.4). The average number of diagnosis codes from 2016 to 2018 (relevant for the training data) was 67.6, whereas the average number of codes in 2018 only (relevant for prediction tasks) was 34.6. Variance was very high on the variable drug spending, with an SD of 4383.9 (<xref ref-type="table" rid="table1">Table 1</xref>). Furthermore, we observed a high number of patients with a 0 value in drug spending in 2019 (2,132,938/9,937,919, 21.46%, patients).</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Patients’ data characteristics.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td>Characteristics</td>
                <td>Values</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Age (years), mean (SD)</td>
                <td>45.2 (24.1)</td>
              </tr>
              <tr valign="top">
                <td>Female gender, n/N (%)</td>
                <td>5,426,481/9,937,919 (54.60)</td>
              </tr>
              <tr valign="top">
                <td>Number of cases, mean (SD)</td>
                <td>8.4 (6.7)</td>
              </tr>
              <tr valign="top">
                <td>Emergency in 2019, n/N (%)</td>
                <td>1,820,736/9,937,919 (18.32)</td>
              </tr>
              <tr valign="top">
                <td>Drug cost (€<sup>a</sup>), mean (SD)</td>
                <td>632.1 (4383.9)</td>
              </tr>
              <tr valign="top">
                <td>Number of codes from 2016-2018, mean (SD)</td>
                <td>67.6 (92.4)</td>
              </tr>
              <tr valign="top">
                <td>Number of codes in 2018, mean (SD)</td>
                <td>34.6 (45.5)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>€1=US $1.08 (as of March 27, 2023).</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Top M Diagnosis Codes</title>
        <p>The baseline model was constructed from a binary encoding of the top M diagnosis codes, for varying numbers of M. The most prevalent diagnosis code was I10.90 (hypertension; 2,591,336/9,937,919, 26.08%, patients), followed by J06.9 (unspecified acute upper respiratory infection) and Z12.9 (unspecified special screening for neoplasms used in the various German cancer screening programs [<xref ref-type="bibr" rid="ref65">65</xref>]). Many patients have at least one of the top diagnoses (eg, 8,947,182/9,937,919, 90.03%, patients) have at least one of the most prevalent diagnoses). By contrast, over 2000 unique diagnosis codes make up the bulk of the diagnoses, with a share of over 90% of all diagnosis codes (317,316,756/343,751,225, 92.31%) in the data (Supplementary Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
      </sec>
      <sec>
        <title>Hyperparameter Tuning Results</title>
        <p>The Bayesian optimization search for the best hyperparameter configuration revealed that the default parameters are not sufficient and can be greatly improved upon (<xref rid="figure2" ref-type="fig">Figure 2</xref>). The performance of the default parameter configuration did not exceed that of the top M diagnoses baseline model.</p>
        <p>The most important hyperparameters (Supplementary Figure S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) were (in order): the choice of DBOW over DM, the number of epochs (choosing 3), the negative sampling exponent (choosing approximately –2.3, compared with the default [0.75]), and the learning rate alpha (choosing approximately 0.0014, compared with the default [0.025]).</p>
        <p>When compared with the top M diagnoses approach with M=100, the final set of parameters with a vector size of 100 resulted in a 9 percent point increase on our aggregated performance metric. All final models with a vector size of 10 or larger increased performance over this baseline model of the top 100 diagnoses. For smaller vector sizes, the gains in performance compared with the baseline models of equal size were larger (<xref rid="figure2" ref-type="fig">Figure 2</xref>). After a vector size of about 50, the performance of the vectorization increased by lesser amounts.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>A comparison of the default vectorization model, the baseline model (the top M diagnoses), and the final model after hyperparameter tuning based on the total score of how well they did on prediction tasks.</p>
          </caption>
          <graphic xlink:href="ai_v2i1e40755_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Linear/Logistic Regression Versus Gradient-Boosted Trees</title>
        <p>The ensemble-based machine learning with LightGBM Regressor/Classifier on the final vectorization model performed better than the linear and logistic regression counterparts on the vectorization data as well as the top M diagnoses data (Supplementary Figure S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Additionally, we observed a bigger increase in performance by switching from top M diagnoses data to Pat2Vec-derived vectors on smaller vector sizes, which stresses that information is compressed well by the vectorization. Furthermore, up to a vector size of about 100, the vectorization data with linear/logistic regression or LightGBM outperformed even the LightGBM approach on the binary-encoded data, which indicates that nonlinear properties of the patient profiles were encoded in the vector embeddings. In summary, using gradient-boosted trees or vector embeddings is always beneficial, and the combination of the 2 yields the best results.</p>
      </sec>
      <sec>
        <title>Robustness Analysis</title>
        <sec>
          <title>Diagnosis Dropout</title>
          <p>As a sensitivity or robustness analysis of the vector embedding (and the baseline binary encoding), we calculated total scores on the reduced dropout data (with 10%, 25%, and 50% of diagnosis codes missing, respectively). We observed a steeper decrease for the binary-encoded top 100 diagnoses data, while the performance of the vectorization suffers mildly even with a 50% drop out of the diagnosis data (Supplementary Figure S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
        </sec>
        <sec>
          <title>Vectorization Training Data Sample Size</title>
          <p>As an additional robustness analysis of the vector embedding with regard to necessary training data size, we calculated total scores on reduced vectorization training data, from 100% (the original 10 million patients’ training data) to 0.1% of the original training data, or 10,000 patients. We observed a total score above 1 (thus, above the performance of the binary-encoded baseline model) for sample sizes as low as 0.5% of the original data, or 50,000 patients (Supplementary Figure S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), while sample sizes of at least 1 million patients are needed to achieve total scores close to the total score on the original data.</p>
        </sec>
      </sec>
      <sec>
        <title>Analysis of Patient Embedding</title>
        <p>For visualization purposes, we projected the final vectorization model with a vector size of 100 into 2 dimensions using the UMAP algorithm. This way we were able to illustrate the high-dimensional vectorization and patterns within the patients’ cohort (<xref rid="figure3" ref-type="fig">Figures 3</xref> and <xref rid="figure4" ref-type="fig">4</xref>).</p>
        <p>We observed a triangular shape in the vector space of the embedded patient profiles, with multiple regions of higher density. The 3 corner areas are (1) young patients of both genders with a low number of cases and low prescription costs; (2) women with an average age below the average age of the cohort and with low prescription costs and a medium number of cases; and (3) elderly patients of both genders with a high number of cases and high prescription costs (<xref rid="figure3" ref-type="fig">Figure 3</xref>). The HDBSCAN clustering identified 14 clusters but showed that many patients are not easily mapped to a cluster (50.67%, 42,024/82,937, of test data; <xref rid="figure4" ref-type="fig">Figure 4</xref>).</p>
        <p>A closer inspection of the clusters revealed interesting patterns in the subcohorts (<xref rid="figure4" ref-type="fig">Figure 4</xref> and <xref ref-type="table" rid="table2">Table 2</xref>; also see <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> for further details). The clusters 5, 13, and 14 all have a mean age of almost 70 years or older, but differ in the share of females, mean number of cases, rate of emergency cases, and drug spending costs. Among these clusters, cluster 13 is the oldest with distinctive ICD-10 diagnoses of F03 (dementia) and R32 (urinary incontinence), along with a large number of patients who do not appear in 2019’s data, which indicates a high mortality within cluster 13. Clusters 5 and 6 have the most distinctive diagnosis codes in the H52 section (refractive errors/eyesight), but differ in their average age. Clusters 1 and 2 are almost exclusively female and of around the same mean age, but cluster 1 has a higher share of emergencies, and overexpressed ICD code Z34 (supervision of normal pregnancy) and section O09 (duration of pregnancy) point to pregnancy. Clusters 11 and 8 are the 2 youngest clusters, where cluster 11 is mostly characterized by routine examinations and vaccinations (Z00.1: routine child health examination; Z23.8 and Z27.8: immunizations), whereas cluster 8 is characterized by developmental disorders of speech and language (F80.9 and F80.0). Patients in cluster 12 have the most common acute ambulatory diseases (J06.9: acute upper respiratory infection; A09.9: gastroenteritis/colitis; and R51: headache). The remaining clusters show the other most prominent public health concerns in the German ambulatory health care system: cluster 3 (hay fever/asthma), cluster 4 (hypothyroidism), cluster 7 (depressive disorders), cluster 9 (pinched nerve/back pain/disc disorders), and cluster 10 (diabetes type 2).</p>
        <p>Regarding the explainability or backward interpretation of our embedding, we analyzed how specific ICD-10 diagnosis codes map onto the patient vector dimensions. A heatmap of the correlations between a subset of 60 diagnosis codes and the 100D embedding showed that similar disease concepts were mapped to the same vector dimensions in a blockwise manner (Supplementary Figure S5 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). It also showed that disease information was spread out over multiple dimensions instead of being mapped to only 1 dimension as in binary encoding.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>UMAP embedding of Pat2Vec, colored by age/gender/number of cases in 2019/emergency treatment in 2019/last available year in claims data/drug prescription costs in 2019. f: female; m: male; UMAP: uniform manifold approximation and projection.</p>
          </caption>
          <graphic xlink:href="ai_v2i1e40755_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>UMAP embedding of Pat2Vec, numbers 1-14 indicate clusters found by HDBSCAN (hierarchical density–based spatial clustering of applications with noise). UMAP: uniform manifold approximation and projection.</p>
          </caption>
          <graphic xlink:href="ai_v2i1e40755_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Properties of clustered patients’ cohorts.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="100"/>
            <col width="120"/>
            <col width="120"/>
            <col width="120"/>
            <col width="120"/>
            <col width="120"/>
            <col width="120"/>
            <col width="180"/>
            <thead>
              <tr valign="bottom">
                <td>Cluster</td>
                <td>Percentage of cohort</td>
                <td>Mean age (years)</td>
                <td>Female, %</td>
                <td>Mean number of cases</td>
                <td>Emergency, %</td>
                <td>Mean drug spending (€<sup>a</sup>)</td>
                <td>Distinctive ICD-10<sup>b</sup> codes</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>11</td>
                <td>3.8</td>
                <td>4.1</td>
                <td>50.4</td>
                <td>4.8</td>
                <td>35.2</td>
                <td>69.26</td>
                <td>Z00.1, Z23.8, Z27.8</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>1.5</td>
                <td>9.4</td>
                <td>35.9</td>
                <td>5.7</td>
                <td>27.1</td>
                <td>198.01</td>
                <td>F80.9, F80.0, Z00.1</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>1.1</td>
                <td>21.7</td>
                <td>49.0</td>
                <td>5.3</td>
                <td>21.8</td>
                <td>62.77</td>
                <td>H52.2, H52.0, H52.1</td>
              </tr>
              <tr valign="top">
                <td>12</td>
                <td>6.7</td>
                <td>27.6</td>
                <td>31.3</td>
                <td>4.6</td>
                <td>19.8</td>
                <td>175.77</td>
                <td>J06.9, A09.9, R51</td>
              </tr>
              <tr valign="top">
                <td>1</td>
                <td>1.7</td>
                <td>32.0</td>
                <td>99.9</td>
                <td>8.4</td>
                <td>28.4</td>
                <td>230.47</td>
                <td>Z34, N89.8, O09.3</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>4.0</td>
                <td>33.3</td>
                <td>38.1</td>
                <td>7.1</td>
                <td>19.1</td>
                <td>323.30</td>
                <td>J30.1, J45.9, J45.0</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>9.3</td>
                <td>33.7</td>
                <td>99.7</td>
                <td>8.6</td>
                <td>18.7</td>
                <td>130.00</td>
                <td>N89.8, Z30.9, Z12.9</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>2.6</td>
                <td>44.5</td>
                <td>57.1</td>
                <td>9.9</td>
                <td>19.0</td>
                <td>431.01</td>
                <td>F32.9, F32.1, F33.1</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>2.4</td>
                <td>48.6</td>
                <td>86.7</td>
                <td>9.9</td>
                <td>13.9</td>
                <td>191.26</td>
                <td>E03.9, E06.3, Z12.9</td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>6.6</td>
                <td>57.6</td>
                <td>47.0</td>
                <td>10.4</td>
                <td>15.7</td>
                <td>592.98</td>
                <td>M54.1, M51.2, M54.5</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>3.7</td>
                <td>59.3</td>
                <td>37.3</td>
                <td>8.4</td>
                <td>11.5</td>
                <td>480.11</td>
                <td>I10.9, I10.90, E11.9</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>2.1</td>
                <td>69.9</td>
                <td>59.6</td>
                <td>10.9</td>
                <td>12.9</td>
                <td>809.16</td>
                <td>H52.2, H52.4, H52.0</td>
              </tr>
              <tr valign="top">
                <td>14</td>
                <td>2.6</td>
                <td>74.4</td>
                <td>37.4</td>
                <td>11.9</td>
                <td>16.0</td>
                <td>1587.98</td>
                <td>I10.9, I10.90, I25.1</td>
              </tr>
              <tr valign="top">
                <td>13</td>
                <td>1.3</td>
                <td>80.7</td>
                <td>62.9</td>
                <td>8.2</td>
                <td>26.6</td>
                <td>1248.64</td>
                <td>F03, R32, I10.9</td>
              </tr>
              <tr valign="top">
                <td>None</td>
                <td>50.7</td>
                <td>50.2</td>
                <td>51.0</td>
                <td>9.4</td>
                <td>17.9</td>
                <td>908.89</td>
                <td>N/A<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>All</td>
                <td>100.0</td>
                <td>45.6</td>
                <td>54.5</td>
                <td>8.7</td>
                <td>18.7</td>
                <td>654.17</td>
                <td>N/A</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>€1=US $1.08 (as of March 27, 2023).</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>ICD-10: International Statistical Classification of Diseases and Related Health Problems, 10th Revision.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Prediction of Drug Spending Costs</title>
        <p>Predicting prospective individual drug spending from diagnosis data is an especially hard task [<xref ref-type="bibr" rid="ref66">66</xref>]. We predicted 2019’s patient-level drug spending based on patients’ diagnosis codes from 2018. We used and compared the binary-encoded top 100 diagnoses and our vectorization of dimension 100 (Pat2Vec). In addition, we extended the data by age and gender of patients. <xref ref-type="table" rid="table3">Table 3</xref> shows the results using linear regression as well as gradient-boosted trees. We observed an overall high relative increase in performance by using the vectorization over the baseline model, while in general the <italic>R</italic><sup>2</sup> values were low. The linear regression shows diverging results between the top 100 and vectorization data with regard to absolute errors and squared errors (CPM and <italic>R</italic><sup>2</sup>). The gradient-boosted trees approach to regression performed similarly to the linear regression on the baseline model of binary-encoded top 100 diagnoses, while the combination of Pat2Vec and gradient-boosted trees performed best. Adding age and gender as additional variables led only to small increases in performance.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p><italic>R</italic><sup>2</sup>, mean absolute error, and Cumming prediction measure of predicting drug spending costs using linear regression and LightGBM Regressor.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="60"/>
            <col width="180"/>
            <col width="160"/>
            <col width="0"/>
            <col width="60"/>
            <col width="180"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td>Measure</td>
                <td colspan="4">Linear regression</td>
                <td colspan="3">LightGBM Regressor</td>
              </tr>
              <tr valign="bottom">
                <td>
                  <break/>
                </td>
                <td><italic>R</italic><sup>2</sup>, %</td>
                <td>Mean absolute error (€<sup>a</sup>)</td>
                <td>Cumming prediction measure, %</td>
                <td colspan="2"><italic>R</italic><sup>2</sup>, %</td>
                <td>Mean absolute error (€)</td>
                <td>Cumming prediction measure, %</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Age + gender</td>
                <td>1.0</td>
                <td>818.44</td>
                <td>7.4</td>
                <td colspan="2">1.1</td>
                <td>801.09</td>
                <td>9.4</td>
              </tr>
              <tr valign="top">
                <td>Top 100</td>
                <td>2.0</td>
                <td>760.55</td>
                <td>14.0</td>
                <td colspan="2">2.1</td>
                <td>755.76</td>
                <td>14.5</td>
              </tr>
              <tr valign="top">
                <td>Top 100 + age + gender</td>
                <td>2.0</td>
                <td>757.13</td>
                <td>14.4</td>
                <td colspan="2">2.4</td>
                <td>752.78</td>
                <td>14.9</td>
              </tr>
              <tr valign="top">
                <td>Pat2Vec</td>
                <td>7.7</td>
                <td>845.99</td>
                <td>4.3</td>
                <td colspan="2">12.9</td>
                <td>704.01</td>
                <td>20.4</td>
              </tr>
              <tr valign="top">
                <td>Pat2Vec + age + gender</td>
                <td>7.7</td>
                <td>845.98</td>
                <td>4.3</td>
                <td colspan="2">13.7</td>
                <td>690.70</td>
                <td>21.9</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>€1=US $1.08 (as of March 27, 2023).</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>We found that the NLP-based vector embeddings of claims data led to large improvements on health care–related prediction tasks compared with standard approaches (represented by binary encoding). Hyperparameter tuning is necessary for these improvements. On health care prediction tasks, gradient-boosted tree algorithms outperform standard statistical methods (linear or logistic regression). Gradient-boosted trees benefit more from vectorization. Additionally, the performance of the vectorization is more robust against incomplete data, but at least 1 million patients are needed to train the vectorization model. Furthermore, our cohort analysis shows that most patients’ diagnosis profiles lie on a spectrum of morbidity and cannot be easily mapped to distinct patient clusters. Overall, the results suggest we achieved the intended compression of the complete patient profiles while keeping the relevant amount of available information for prediction tasks.</p>
      </sec>
      <sec>
        <title>Comparison With Previous Research</title>
        <p>Embeddings of diagnosis codes have been studied extensively before [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref38">38</xref>-<xref ref-type="bibr" rid="ref42">42</xref>]. Patient-level embeddings have been derived rarely [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. To the best of our knowledge, there is no ICD-10–based patient vectorization model trained and optimized for application in generalized health care tasks.</p>
        <p>Choi et al [<xref ref-type="bibr" rid="ref39">39</xref>] trained ICD-9 code representations using another similar NLP approach, and at the same time they learned “visit representations” (vectors) based on a binary encoding of the diagnosis codes for individual visits. Using logistic regression and these representations of visits, they were able to predict future disease codes from 1 visit to the next and clinical risk groups [<xref ref-type="bibr" rid="ref27">27</xref>]. In a similar way, Pham et al [<xref ref-type="bibr" rid="ref41">41</xref>] trained diagnosis code representations and combined them into variable-size “admission representations” as input for a long short-term memory (LSTM) to predict individual health prognoses after a health care intervention.</p>
        <p>Miotto et al [<xref ref-type="bibr" rid="ref25">25</xref>] derived a patient-level embedding (Deep Patient) using autoencoders based on ICD-9 diagnosis codes in conjunction with medications, procedures, laboratory tests, clinical notes (free-text), and demographic variables. They used random forests and patient embeddings to predict future diseases, but they did not tune their embedding algorithm or prepare it for more general tasks.</p>
        <p>Nguyen et al [<xref ref-type="bibr" rid="ref42">42</xref>] found diagnosis code embeddings using Word2Vec. Subsequently, given an outcome, they trained a convolutional neural network to find predictive motifs for a classifier. They arrived at a patient-level embedding after the convolutional neural network step, but these embeddings are dependent on the classification task (they predicted unplanned readmissions in a hospital setting).</p>
        <p>Almog et al [<xref ref-type="bibr" rid="ref14">14</xref>] applied a similar approach (Crystal Bone) to the special problem of predicting bone fracture incidents. For the prediction of this specific task, they trained their vectorization models on data filtered for bone incidents. They described 2 approaches: gradient-boosted trees (using XGBoost [<xref ref-type="bibr" rid="ref67">67</xref>]) on patients’ vector embeddings as well as an LSTM [<xref ref-type="bibr" rid="ref68">68</xref>] neural network on the individual sequences of patients’ diagnosis code embeddings. They observed better performance with the LSTM approach.</p>
        <p>Li et al [<xref ref-type="bibr" rid="ref29">29</xref>] derived an embedding for disease codes and a framework to predict diseases and even generalized outcomes (BEHRT). They did not set up a patient-level embedding with a fixed size, and their embedding framework needs to be retrained for new prediction tasks.</p>
        <p>We were more interested in a general compression and embedding of patients themselves for general health care–related tasks (such as the prediction of different outcomes and an overall visualization) and not just the optimization of 1 prediction task only, thus we trained on the data of all patients, not filtered for specific diagnoses, and restricted ourselves to the analysis of the patients’ vector embeddings. In addition, our embedding is based solely on the ICD-10 diagnosis data and does not need additional data sources that might not be readily available in a claims data setting. It would be helpful to look into how well other advanced machine learning algorithms such as LSTM or convolutional neural networks work on the ICD or patient vector embeddings for health care prediction tasks, but this is outside the scope of this paper.</p>
        <p>Adkins [<xref ref-type="bibr" rid="ref69">69</xref>] discussed the implications of a widespread adoption of machine learning on EHR data in clinical prediction contexts. While arguing that more complex machine learning models (such as the one presented in this work, combining vectorization and ensemble trees) on growing bodies of data will yield more precise predictions at the price of interpretability (as well as unforeseen ethical and legal issues), they pointed out the limitations of considering a limited amount of ICD codes, a problem that we could address to a large extent in our work. Interpreting the dimensions of the vectorizations and other steps to “explainable machine learning/artificial intelligence” are still ongoing (eg, building on the Shapley additive explanations values for tree methods [<xref ref-type="bibr" rid="ref70">70</xref>,<xref ref-type="bibr" rid="ref71">71</xref>]). Here, we employed a simple approach using correlations between vector embeddings and binary encoding to allow interpretation of vector dimensions with regard to specific ICD-10 codes.</p>
      </sec>
      <sec>
        <title>Limitations and Strengths</title>
        <p>It has been discussed that a fusion of EHR data (clinical/diagnosis data and laboratory quantitative measurements) and other data sources (eg, medical images and laboratory measurements) would lead to further advancements in health care prediction tasks [<xref ref-type="bibr" rid="ref72">72</xref>,<xref ref-type="bibr" rid="ref73">73</xref>], where the problems of these mixed data types need to be properly addressed. Unfortunately, the claims data of the presented analysis do not contain these additional data sources, and thus the current implementation cannot acknowledge this.</p>
        <p>We set up access to a pretrained model of our vectorization with 10 dimensions so that other researchers in the field can evaluate our methods and use the model on their own health care data [<xref ref-type="bibr" rid="ref74">74</xref>].</p>
      </sec>
      <sec>
        <title>Future Research</title>
        <p>The next step will be to use the provided vectorization for relevant tasks to improve health care. We will investigate whether our approach will benefit tasks such as disease prediction with a long genesis time and prevention in cases of early detection, such as dementia and mild cognitive impairment. Furthermore, we will compare the benefits of data-driven vectorization with common EHR-based procedures such as the Elixhauser score [<xref ref-type="bibr" rid="ref18">18</xref>] or clinical risk groups [<xref ref-type="bibr" rid="ref27">27</xref>] in terms of describing patient cohorts or predicting health care outcomes. We think that patient clustering based on robust vectorization has the potential to identify patients who would benefit from early screening, which would lead to more personalized screening measures.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Health care–related prediction tasks that rely on large samples of data should make use of vectorization instead of binary encoding. Our fully pretrained and validated model can be used on new and possibly small data sets as well. Advanced machine learning techniques profit more from our vectorization. We enable more precise prediction models for decisions on future public health policies as well as more accurate health care services for individual patients.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Information on previous studies, top M diagnoses, hyperparameter importance, performance comparisons, and vector loadings.</p>
        <media xlink:href="ai_v2i1e40755_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 342 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Extended Table 2 of main manuscript.</p>
        <media xlink:href="ai_v2i1e40755_app2.pdf" xlink:title="PDF File  (Adobe PDF File), 102 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">CPM</term>
          <def>
            <p>Cumming predictive measure</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">DBOW</term>
          <def>
            <p>distributed bag of words</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">DM</term>
          <def>
            <p>distributed memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">GDPR</term>
          <def>
            <p>General Data Protection Regulation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">HDBSCAN</term>
          <def>
            <p>hierarchical density–based spatial clustering of applications with noise</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">ICD-10</term>
          <def>
            <p>International Statistical Classification of Diseases and Related Health Problems, 10th Revision</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">ICD-10-GM</term>
          <def>
            <p>International Statistical Classification of Diseases and Related Health Problems, 10th revision, German Modification</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">LightGBM</term>
          <def>
            <p>light gradient-boosted machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">LSTM</term>
          <def>
            <p>long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">SHI</term>
          <def>
            <p>statutory health insurance</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">UMAP</term>
          <def>
            <p>uniform manifold approximation and projection</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work is funded and contracted by the Associations of Statutory Health Insurance Physicians in the German Federal States.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The data sets that formed the training data during this study are not publicly available due to the regulations for sensitive health data in Article 9 of the General Data Protection Regulation (GDPR) of the European Union. Access can be given by official boards within the context of specific research projects, and the authors are available to discuss such possibilities. An embedding model that was made as part of this study is available online so that other researchers in the field can evaluate our procedures and apply the model to their own health care data [<xref ref-type="bibr" rid="ref74">74</xref>].</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="conflict">
        <p>This work and the Central Research Institute of Ambulatory Health Care in Germany (Zi) are funded and contracted by the Associations of Statutory Health Insurance Physicians in the German Federal States. It is its task to support and further develop the health care assurance mandate under German law.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Häyrinen</surname>
              <given-names>Kristiina</given-names>
            </name>
            <name name-style="western">
              <surname>Saranto</surname>
              <given-names>Kaija</given-names>
            </name>
            <name name-style="western">
              <surname>Nykänen</surname>
              <given-names>Pirkko</given-names>
            </name>
          </person-group>
          <article-title>Definition, structure, content, use and impacts of electronic health records: a review of the research literature</article-title>
          <source>Int J Med Inform</source>
          <year>2008</year>
          <month>05</month>
          <volume>77</volume>
          <issue>5</issue>
          <fpage>291</fpage>
          <lpage>304</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2007.09.001</pub-id>
          <pub-id pub-id-type="medline">17951106</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(07)00168-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>Bonnie B</given-names>
            </name>
            <name name-style="western">
              <surname>Lam</surname>
              <given-names>Jessica</given-names>
            </name>
            <name name-style="western">
              <surname>Natoli</surname>
              <given-names>Jaim L</given-names>
            </name>
            <name name-style="western">
              <surname>Butler</surname>
              <given-names>Qiana</given-names>
            </name>
            <name name-style="western">
              <surname>Aguilar</surname>
              <given-names>Daniel</given-names>
            </name>
            <name name-style="western">
              <surname>Nordyke</surname>
              <given-names>Robert J</given-names>
            </name>
          </person-group>
          <article-title>Review: use of electronic medical records for health outcomes research: a literature review</article-title>
          <source>Med Care Res Rev</source>
          <year>2009</year>
          <month>12</month>
          <volume>66</volume>
          <issue>6</issue>
          <fpage>611</fpage>
          <lpage>38</lpage>
          <pub-id pub-id-type="doi">10.1177/1077558709332440</pub-id>
          <pub-id pub-id-type="medline">19279318</pub-id>
          <pub-id pub-id-type="pii">1077558709332440</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Brunak</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Mining electronic health records: towards better research applications and clinical care</article-title>
          <source>Nat Rev Genet</source>
          <year>2012</year>
          <month>05</month>
          <day>02</day>
          <volume>13</volume>
          <issue>6</issue>
          <fpage>395</fpage>
          <lpage>405</lpage>
          <pub-id pub-id-type="doi">10.1038/nrg3208</pub-id>
          <pub-id pub-id-type="medline">22549152</pub-id>
          <pub-id pub-id-type="pii">nrg3208</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Casey</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Adler</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Using Electronic Health Records for Population Health Research: A Review of Methods and Applications</article-title>
          <source>Annu Rev Public Health</source>
          <year>2016</year>
          <volume>37</volume>
          <fpage>61</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26667605"/>
          </comment>
          <pub-id pub-id-type="doi">10.1146/annurev-publhealth-032315-021353</pub-id>
          <pub-id pub-id-type="medline">26667605</pub-id>
          <pub-id pub-id-type="pmcid">PMC6724703</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <collab>World Health Organization</collab>
          </person-group>
          <source>International Statistical Classification of Diseases and Related Health Problems, 10th Revision, Fifth Edition, 2016</source>
          <year>2015</year>
          <publisher-loc>Geneva, Switzerland</publisher-loc>
          <publisher-name>World Health Organization</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Page</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Peissig</surname>
              <given-names>Peggy L</given-names>
            </name>
            <name name-style="western">
              <surname>Natarajan</surname>
              <given-names>Sriraam</given-names>
            </name>
            <name name-style="western">
              <surname>McCarty</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Statistical Relational Learning to Predict Primary Myocardial Infarction from Electronic Health Records</article-title>
          <source>Proc Innov Appl Artif Intell Conf</source>
          <year>2012</year>
          <volume>2012</volume>
          <fpage>2341</fpage>
          <lpage>2347</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25360347"/>
          </comment>
          <pub-id pub-id-type="medline">25360347</pub-id>
          <pub-id pub-id-type="pmcid">PMC4211289</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Risk Prediction with Electronic Health Records: A Deep Learning Approach</article-title>
          <year>2016</year>
          <conf-name>SIAM International Conference on Data Mining (SDM)</conf-name>
          <conf-date>May 5-7, 2016</conf-date>
          <conf-loc>Miami, FL</conf-loc>
          <fpage>432</fpage>
          <lpage>440</lpage>
          <pub-id pub-id-type="doi">10.1137/1.9781611974348.49</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bahadori</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kulas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Schuetz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>RETAIN: An interpretable predictive model for healthcare using REverse time AttentIoN mechanism</article-title>
          <year>2016</year>
          <conf-name>30th International Conference on Neural Information Processing Systems</conf-name>
          <conf-date>December 5-10, 2016</conf-date>
          <conf-loc>Barcelona, Spain</conf-loc>
          <fpage>3512</fpage>
          <lpage>3520</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.5555/3157382.3157490"/>
          </comment>
          <pub-id pub-id-type="doi">10.5555/3157382.3157490</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chakrabortty</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ananthakrishnan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gainer</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Churchill</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kohane</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Surrogate-assisted feature extraction for high-throughput phenotyping</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2017</year>
          <month>04</month>
          <day>01</day>
          <volume>24</volume>
          <issue>e1</issue>
          <fpage>e143</fpage>
          <lpage>e149</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/27632993"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocw135</pub-id>
          <pub-id pub-id-type="medline">27632993</pub-id>
          <pub-id pub-id-type="pii">ocw135</pub-id>
          <pub-id pub-id-type="pmcid">PMC6080726</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rubin</surname>
              <given-names>KH</given-names>
            </name>
            <name name-style="western">
              <surname>Möller</surname>
              <given-names>Sören</given-names>
            </name>
            <name name-style="western">
              <surname>Holmberg</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Bliddal</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Søndergaard</surname>
              <given-names>Jens</given-names>
            </name>
            <name name-style="western">
              <surname>Abrahamsen</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>A New Fracture Risk Assessment Tool (FREM) Based on Public Health Registries</article-title>
          <source>J Bone Miner Res</source>
          <year>2018</year>
          <month>11</month>
          <volume>33</volume>
          <issue>11</issue>
          <fpage>1967</fpage>
          <lpage>1979</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://onlinelibrary.wiley.com/doi/10.1002/jbmr.3528"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/jbmr.3528</pub-id>
          <pub-id pub-id-type="medline">29924428</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Steele</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Denaxas</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Hemingway</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Luscombe</surname>
              <given-names>NM</given-names>
            </name>
          </person-group>
          <article-title>Machine learning models in electronic health records can outperform conventional survival models for predicting patient mortality in coronary artery disease</article-title>
          <source>PLoS One</source>
          <year>2018</year>
          <volume>13</volume>
          <issue>8</issue>
          <fpage>e0202344</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0202344"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0202344</pub-id>
          <pub-id pub-id-type="medline">30169498</pub-id>
          <pub-id pub-id-type="pii">PONE-D-18-03097</pub-id>
          <pub-id pub-id-type="pmcid">PMC6118376</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jorge</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Castro</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Barnado</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gainer</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Carroll</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Crofford</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Costenbader</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Karlson</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Feldman</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Identifying lupus patients in electronic health records: Development and validation of machine learning algorithms and application of rule-based algorithms</article-title>
          <source>Semin Arthritis Rheum</source>
          <year>2019</year>
          <month>08</month>
          <volume>49</volume>
          <issue>1</issue>
          <fpage>84</fpage>
          <lpage>90</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30665626"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.semarthrit.2019.01.002</pub-id>
          <pub-id pub-id-type="medline">30665626</pub-id>
          <pub-id pub-id-type="pii">S0049-0172(18)30651-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC6609504</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tomašev</surname>
              <given-names>Nenad</given-names>
            </name>
            <name name-style="western">
              <surname>Glorot</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Rae</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Zielinski</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Askham</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Saraiva</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mottram</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Meyer</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ravuri</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Protsyuk</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Connell</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hughes</surname>
              <given-names>CO</given-names>
            </name>
            <name name-style="western">
              <surname>Karthikesalingam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cornebise</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Montgomery</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Rees</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Laing</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Baker</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Peterson</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Reeves</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hassabis</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>King</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Suleyman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Back</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Nielson</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ledsam</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Mohamed</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>A clinically applicable approach to continuous prediction of future acute kidney injury</article-title>
          <source>Nature</source>
          <year>2019</year>
          <month>08</month>
          <volume>572</volume>
          <issue>7767</issue>
          <fpage>116</fpage>
          <lpage>119</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31367026"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41586-019-1390-1</pub-id>
          <pub-id pub-id-type="medline">31367026</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41586-019-1390-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC6722431</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Almog</surname>
              <given-names>YA</given-names>
            </name>
            <name name-style="western">
              <surname>Rai</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Moulaison</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Powell</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mishra</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Weinberg</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hamilton</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Oates</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>McCloskey</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Cummings</surname>
              <given-names>SR</given-names>
            </name>
          </person-group>
          <article-title>Deep Learning With Electronic Health Records for Short-Term Fracture Risk Identification: Crystal Bone Algorithm Development and Validation</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>10</month>
          <day>16</day>
          <volume>22</volume>
          <issue>10</issue>
          <fpage>e22550</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/10/e22550/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/22550</pub-id>
          <pub-id pub-id-type="medline">32956069</pub-id>
          <pub-id pub-id-type="pii">v22i10e22550</pub-id>
          <pub-id pub-id-type="pmcid">PMC7600029</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kogan</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Twyman</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Heap</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Milentijevic</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Alberts</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Assessing stroke severity using electronic health record data: a machine learning approach</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2020</year>
          <month>01</month>
          <day>08</day>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>8</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-019-1010-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-019-1010-x</pub-id>
          <pub-id pub-id-type="medline">31914991</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-019-1010-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC6950922</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Martinez</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Levin</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>EY</given-names>
            </name>
            <name name-style="western">
              <surname>Parikh</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Menez</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Hinson</surname>
              <given-names>JS</given-names>
            </name>
          </person-group>
          <article-title>Early Prediction of Acute Kidney Injury in the Emergency Department With Machine-Learning Methods Applied to Electronic Health Record Data</article-title>
          <source>Ann Emerg Med</source>
          <year>2020</year>
          <month>10</month>
          <volume>76</volume>
          <issue>4</issue>
          <fpage>501</fpage>
          <lpage>514</lpage>
          <pub-id pub-id-type="doi">10.1016/j.annemergmed.2020.05.026</pub-id>
          <pub-id pub-id-type="medline">32713624</pub-id>
          <pub-id pub-id-type="pii">S0196-0644(20)30397-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Su</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Aseltine</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Doshi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Rogers</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Machine learning for suicide risk prediction in children and adolescents with electronic health records</article-title>
          <source>Transl Psychiatry</source>
          <year>2020</year>
          <month>11</month>
          <day>26</day>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>413</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41398-020-01100-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41398-020-01100-0</pub-id>
          <pub-id pub-id-type="medline">33243979</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41398-020-01100-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC7693189</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elixhauser</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Steiner</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Coffey</surname>
              <given-names>RM</given-names>
            </name>
          </person-group>
          <article-title>Comorbidity measures for use with administrative data</article-title>
          <source>Med Care</source>
          <year>1998</year>
          <month>01</month>
          <volume>36</volume>
          <issue>1</issue>
          <fpage>8</fpage>
          <lpage>27</lpage>
          <pub-id pub-id-type="doi">10.1097/00005650-199801000-00004</pub-id>
          <pub-id pub-id-type="medline">9431328</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>White</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Washington</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Coenen</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Elixhauser</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Identifying Increased Risk of Readmission and In-hospital Mortality Using Hospital Administrative Data: The AHRQ Elixhauser Comorbidity Index</article-title>
          <source>Med Care</source>
          <year>2017</year>
          <month>07</month>
          <volume>55</volume>
          <issue>7</issue>
          <fpage>698</fpage>
          <lpage>705</lpage>
          <pub-id pub-id-type="doi">10.1097/MLR.0000000000000735</pub-id>
          <pub-id pub-id-type="medline">28498196</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Corey</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kashyap</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lorenzi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lagoo-Deenadayalan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Heller</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Whalen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Balu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Heflin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>McDonald</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Swaminathan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sendak</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Development and validation of machine learning models to identify high-risk surgical patients using automatically curated electronic health record data (Pythia): A retrospective, single-site study</article-title>
          <source>PLoS Med</source>
          <year>2018</year>
          <month>11</month>
          <volume>15</volume>
          <issue>11</issue>
          <fpage>e1002701</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pmed.1002701"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pmed.1002701</pub-id>
          <pub-id pub-id-type="medline">30481172</pub-id>
          <pub-id pub-id-type="pii">PMEDICINE-D-18-01986</pub-id>
          <pub-id pub-id-type="pmcid">PMC6258507</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rahimian</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Salimi-Khorshidi</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Payberah</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tran</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ayala Solares</surname>
              <given-names>Roberto</given-names>
            </name>
            <name name-style="western">
              <surname>Raimondi</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Nazarzadeh</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Canoy</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rahimi</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Predicting the risk of emergency admission with machine learning: Development and validation using linked electronic health records</article-title>
          <source>PLoS Med</source>
          <year>2018</year>
          <month>11</month>
          <volume>15</volume>
          <issue>11</issue>
          <fpage>e1002695</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pmed.1002695"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pmed.1002695</pub-id>
          <pub-id pub-id-type="medline">30458006</pub-id>
          <pub-id pub-id-type="pii">PMEDICINE-D-18-01426</pub-id>
          <pub-id pub-id-type="pmcid">PMC6245681</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hippisley-Cox</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Coupland</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Predicting risk of emergency admission to hospital using primary care data: derivation and validation of QAdmissions score</article-title>
          <source>BMJ Open</source>
          <year>2013</year>
          <month>08</month>
          <day>19</day>
          <volume>3</volume>
          <issue>8</issue>
          <fpage>e003482</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmjopen.bmj.com/lookup/pmidlookup?view=long&#38;pmid=23959760"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjopen-2013-003482</pub-id>
          <pub-id pub-id-type="medline">23959760</pub-id>
          <pub-id pub-id-type="pii">bmjopen-2013-003482</pub-id>
          <pub-id pub-id-type="pmcid">PMC3753502</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Agresti</surname>
              <given-names>Alan</given-names>
            </name>
          </person-group>
          <source>Categorical Data Analysis, 3rd Edition</source>
          <year>2013</year>
          <publisher-loc>Hoboken, NJ</publisher-loc>
          <publisher-name>John Wiley &#38; Sons</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bahadori</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Schuetz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Doctor AI: Predicting Clinical Events via Recurrent Neural Networks</article-title>
          <source>JMLR Workshop Conf Proc</source>
          <year>2016</year>
          <month>08</month>
          <volume>56</volume>
          <fpage>301</fpage>
          <lpage>318</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28286600"/>
          </comment>
          <pub-id pub-id-type="medline">28286600</pub-id>
          <pub-id pub-id-type="pmcid">PMC5341604</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miotto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kidd</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Deep Patient: An Unsupervised Representation to Predict the Future of Patients from the Electronic Health Records</article-title>
          <source>Sci Rep</source>
          <year>2016</year>
          <month>05</month>
          <day>17</day>
          <volume>6</volume>
          <fpage>26094</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/srep26094"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/srep26094</pub-id>
          <pub-id pub-id-type="medline">27185194</pub-id>
          <pub-id pub-id-type="pii">srep26094</pub-id>
          <pub-id pub-id-type="pmcid">PMC4869115</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bian</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Xiong</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Early Detection of Disease Using Electronic Health Records and Fisher’s Wishart Discriminant Analysis</article-title>
          <source>Procedia Computer Science</source>
          <year>2018</year>
          <volume>140</volume>
          <fpage>393</fpage>
          <lpage>402</lpage>
          <pub-id pub-id-type="doi">10.1016/j.procs.2018.10.299</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hughes</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Averill</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Eisenhandler</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Goldfield</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Muldoon</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Neff</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gay</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Clinical Risk Groups (CRGs): a classification system for risk-adjusted capitation-based payment and health care management</article-title>
          <source>Med Care</source>
          <year>2004</year>
          <month>01</month>
          <volume>42</volume>
          <issue>1</issue>
          <fpage>81</fpage>
          <lpage>90</lpage>
          <pub-id pub-id-type="doi">10.1097/01.mlr.0000102367.93252.70</pub-id>
          <pub-id pub-id-type="medline">14713742</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ayala Solares</surname>
              <given-names>Jose Roberto</given-names>
            </name>
            <name name-style="western">
              <surname>Diletta Raimondi</surname>
              <given-names>Francesca Elisa</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Rahimian</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Canoy</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Tran</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pinho Gomes</surname>
              <given-names>Ana Catarina</given-names>
            </name>
            <name name-style="western">
              <surname>Payberah</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zottoli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Nazarzadeh</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Conrad</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Rahimi</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Salimi-Khorshidi</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Deep learning for electronic health records: A comparative review of multiple deep neural architectures</article-title>
          <source>J Biomed Inform</source>
          <year>2020</year>
          <month>01</month>
          <volume>101</volume>
          <fpage>103337</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(19)30256-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2019.103337</pub-id>
          <pub-id pub-id-type="medline">31916973</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(19)30256-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Rao</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Solares</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hassaine</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ramakrishnan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Canoy</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Rahimi</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Salimi-Khorshidi</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>BEHRT: Transformer for Electronic Health Records</article-title>
          <source>Sci Rep</source>
          <year>2020</year>
          <month>04</month>
          <day>28</day>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>7155</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-020-62922-y"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-020-62922-y</pub-id>
          <pub-id pub-id-type="medline">32346050</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-020-62922-y</pub-id>
          <pub-id pub-id-type="pmcid">PMC7189231</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goodfellow</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Courville</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <source>Deep Learning</source>
          <year>2016</year>
          <publisher-loc>Cambridge, MA</publisher-loc>
          <publisher-name>MIT Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <source>ICD-10-GM Version 2020, Systematisches Verzeichnis, Internationale statistische Klassifikation der Krankheiten und verwandter Gesundheitsprobleme, 10. Revision, Stand: 20. September 2019</source>
          <year>2019</year>
          <access-date>2023-03-28</access-date>
          <publisher-loc>Köln, Germany</publisher-loc>
          <publisher-name>Deutsches Institut für Medizinische Dokumentation und Information (DIMDI) im Auftrag des Bundesministeriums für Gesundheit (BMG) unter Beteiligung der Arbeitsgruppe ICD des Kuratoriums für Fragen der Klassifikation im Gesundheitswesen (KKG)</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.bfarm.de/EN/Code-systems/Classifications/ICD/ICD-10-GM/_node.html">https://www.bfarm.de/EN/Code-systems/Classifications/ICD/ICD-10-GM/_node.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Le</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Distributed representations of sentences and documents</article-title>
          <year>2014</year>
          <month>06</month>
          <conf-name>31st International Conference on Machine Learning</conf-name>
          <conf-date>June 22-24, 2014</conf-date>
          <conf-loc>Beijing, China</conf-loc>
          <fpage>1188</fpage>
          <lpage>1196</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.5555/3044805.3045025"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>Tomas</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Kai</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>Greg</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>Jeffrey</given-names>
            </name>
          </person-group>
          <article-title>Efficient Estimation of Word Representations in Vector Space</article-title>
          <source>arxiv</source>
          <comment>Preprint posted online on September 7, 2013
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1301.3781"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1301.3781</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Distributed representations of words and phrases and their compositionality</article-title>
          <year>2013</year>
          <conf-name>27th Conference on Neural Information Processing Systems</conf-name>
          <conf-date>December 5-10, 2013</conf-date>
          <conf-loc>Stateline, NV</conf-loc>
          <fpage>3111</fpage>
          <lpage>3119</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1310.4546"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Caselles-Dupré</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lesaint</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Royo-Letelier</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Word2vec applied to recommendation: Hyperparameters matter</article-title>
          <year>2018</year>
          <conf-name>12th ACM Conference on Recommender Systems</conf-name>
          <conf-date>October 2-7, 2018</conf-date>
          <conf-loc>Vancouver, BC, Canada</conf-loc>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>ACM</publisher-name>
          <fpage>352</fpage>
          <lpage>356</lpage>
          <pub-id pub-id-type="doi">10.1145/3240323.3240377</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Du</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jia</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tao</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhi</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Gene2vec: distributed representation of genes based on co-expression</article-title>
          <source>BMC Genomics</source>
          <year>2019</year>
          <month>02</month>
          <day>04</day>
          <volume>20</volume>
          <issue>Suppl 1</issue>
          <fpage>82</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcgenomics.biomedcentral.com/articles/10.1186/s12864-018-5370-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12864-018-5370-x</pub-id>
          <pub-id pub-id-type="medline">30712510</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12864-018-5370-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC6360648</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chamberlain</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Rossi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Shiebler</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sedhain</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bronstein</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Tuning Word2vec for large scale recommendation system</article-title>
          <year>2020</year>
          <conf-name>14th ACM Conference on Recommender Systems</conf-name>
          <conf-date>September 22-26, 2020</conf-date>
          <conf-loc>Virtual</conf-loc>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Association for Computing Machinery</publisher-name>
          <pub-id pub-id-type="doi">10.1145/3383313.3418486</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tran</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>TD</given-names>
            </name>
            <name name-style="western">
              <surname>Phung</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Venkatesh</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Learning vector representation of medical objects via EMR-driven nonnegative restricted Boltzmann machines (eNRBM)</article-title>
          <source>J Biomed Inform</source>
          <year>2015</year>
          <month>04</month>
          <volume>54</volume>
          <fpage>96</fpage>
          <lpage>105</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00014-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2015.01.012</pub-id>
          <pub-id pub-id-type="medline">25661261</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(15)00014-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bahadori</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Searles</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Coffey</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Thompson</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bost</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tejedor-Sojo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Multi-layer representation learning for medical concepts</article-title>
          <year>2016</year>
          <conf-name>22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name>
          <conf-date>August 13-17, 2016</conf-date>
          <conf-loc>San Francisco, CA</conf-loc>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Association for Computing Machinery</publisher-name>
          <fpage>1495</fpage>
          <lpage>1504</lpage>
          <pub-id pub-id-type="doi">10.1145/2939672.2939823</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chiu</surname>
              <given-names>CY-I</given-names>
            </name>
            <name name-style="western">
              <surname>Sontag</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Learning Low-Dimensional Representations of Medical Concepts</article-title>
          <source>AMIA Jt Summits Transl Sci Proc</source>
          <year>2016</year>
          <volume>2016</volume>
          <fpage>41</fpage>
          <lpage>50</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/27570647"/>
          </comment>
          <pub-id pub-id-type="medline">27570647</pub-id>
          <pub-id pub-id-type="pmcid">PMC5001761</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pham</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tran</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Phung</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Venkatesh</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Bailey</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Washio</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Dobbie</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>JZ</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>DeepCare: a deep dynamic memory model for predictive medicine</article-title>
          <source>Advances in Knowledge Discovery and Data Mining : 20th Pacific-Asia Conference, PAKDD 2016 Auckland, New Zealand, April 19–22, 2016 Proceedings, Part II</source>
          <year>2016</year>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>30</fpage>
          <lpage>41</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Tran</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wickramasinghe</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Venkatesh</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>A Convolutional Net for Medical Records</article-title>
          <source>IEEE J Biomed Health Inform</source>
          <year>2017</year>
          <month>01</month>
          <volume>21</volume>
          <issue>1</issue>
          <fpage>22</fpage>
          <lpage>30</lpage>
          <pub-id pub-id-type="doi">10.1109/JBHI.2016.2633963</pub-id>
          <pub-id pub-id-type="medline">27913366</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>DESTATIS</collab>
          </person-group>
          <article-title>Mitglieder und mitversicherte Familienangehörige der gesetzlichen Krankenversicherung am 1.7. eines Jahres (Anzahl)</article-title>
          <source>Gesundheitsberichterstattung des Bundes</source>
          <year>2022</year>
          <access-date>2023-03-28</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.gbe-bund.de/gbe/">https://www.gbe-bund.de/gbe/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Frahm</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bätzing</surname>
              <given-names>Jörg</given-names>
            </name>
            <name name-style="western">
              <surname>Ellenberger</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Akmatov</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Haas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Rommer</surname>
              <given-names>PS</given-names>
            </name>
            <name name-style="western">
              <surname>Stahmann</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zettl</surname>
              <given-names>UK</given-names>
            </name>
            <name name-style="western">
              <surname>Holstiege</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Treatment patterns in pediatric patients with multiple sclerosis in Germany-a nationwide claim-based analysis</article-title>
          <source>Ther Adv Neurol Disord</source>
          <year>2021</year>
          <volume>14</volume>
          <fpage>17562864211048336</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/abs/10.1177/17562864211048336?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/17562864211048336</pub-id>
          <pub-id pub-id-type="medline">34646362</pub-id>
          <pub-id pub-id-type="pii">10.1177_17562864211048336</pub-id>
          <pub-id pub-id-type="pmcid">PMC8504210</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tikkanen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Osborn</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mossialos</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Djordjevic</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wharton</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>International profiles of health care systems</article-title>
          <source>The Commonwealth Fund</source>
          <year>2020</year>
          <access-date>2023-03-31</access-date>
          <publisher-loc>London, UK</publisher-loc>
          <publisher-name>The Commonwealth Fund</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.commonwealthfund.org/international-health-policy-center/system-profiles">https://www.commonwealthfund.org/international-health-policy-center/system-profiles</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Blümel</surname>
              <given-names>Miriam</given-names>
            </name>
            <name name-style="western">
              <surname>Spranger</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Achstetter</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Maresso</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Busse</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Germany: Health System Review</article-title>
          <source>Health Syst Transit</source>
          <year>2020</year>
          <month>12</month>
          <volume>22</volume>
          <issue>6</issue>
          <fpage>1</fpage>
          <lpage>272</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://apps.who.int/iris/bitstream/handle/10665/341674/HiT-22-6-2020-eng.pdf"/>
          </comment>
          <pub-id pub-id-type="medline">34232120</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Young</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hazarika</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Poria</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cambria</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Recent Trends in Deep Learning Based Natural Language Processing [Review Article]</article-title>
          <source>IEEE Comput. Intell. Mag</source>
          <year>2018</year>
          <month>8</month>
          <volume>13</volume>
          <issue>3</issue>
          <fpage>55</fpage>
          <lpage>75</lpage>
          <pub-id pub-id-type="doi">10.1109/mci.2018.2840738</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Minaee</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kalchbrenner</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Cambria</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Nikzad</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chenaghlu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Deep Learning--based Text Classification: A Comprehensive Review</article-title>
          <source>ACM Comput. Surv</source>
          <year>2021</year>
          <month>04</month>
          <day>17</day>
          <volume>54</volume>
          <issue>3</issue>
          <fpage>1</fpage>
          <lpage>40</lpage>
          <pub-id pub-id-type="doi">10.1145/3439726</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bergstra</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bardenet</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kégl</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Algorithms for hyper-parameter optimization</article-title>
          <year>2011</year>
          <conf-name>24th International Conference on Neural Information Processing Systems</conf-name>
          <conf-date>December 12-17, 2011</conf-date>
          <conf-loc>Granada, Spain</conf-loc>
          <publisher-loc>Red Hook, NY</publisher-loc>
          <publisher-name>Curran Associates</publisher-name>
          <fpage>2546</fpage>
          <lpage>2554</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://proceedings.neurips.cc/paper/2011/file/86e8f7ab32cfd12577bc2619bc635690-Paper.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ke</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Meng</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Finley</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>LightGBM: A highly efficient gradient boosting decision tree</article-title>
          <year>2017</year>
          <conf-name>31st International Conference on Neural Information Processing Systems</conf-name>
          <conf-date>December 4-9, 2017</conf-date>
          <conf-loc>Long Beach, CA</conf-loc>
          <publisher-loc>Red Hook, NY</publisher-loc>
          <publisher-name>Curran Associates</publisher-name>
          <fpage>3149</fpage>
          <lpage>3157</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://proceedings.neurips.cc/paper/2017/file/6449f44a102fde848669bdd9eb6b76fa-Paper.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Greedy function approximation: A gradient boosting machine</article-title>
          <source>The Annals of Statistics</source>
          <year>2001</year>
          <volume>29</volume>
          <issue>5</issue>
          <fpage>1189</fpage>
          <lpage>1232</lpage>
          <pub-id pub-id-type="doi">10.1214/aos/1013203451</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hastie</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tibshirani</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Boosting and additive trees</article-title>
          <source>The elements of statistical learning: Data mining, inference, and prediction</source>
          <year>2009</year>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>337</fpage>
          <lpage>387</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Al Daoud</surname>
              <given-names>Essam</given-names>
            </name>
          </person-group>
          <article-title>Comparison between XGBoost, LightGBM and CatBoost using a home credit dataset</article-title>
          <source>International Journal of Information, Control and Computer Sciences</source>
          <year>2019</year>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>6</fpage>
          <lpage>10</lpage>
          <pub-id pub-id-type="doi">10.5281/zenodo.3607805</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bentéjac</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Csörgő</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Martínez-Muñoz</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>A comparative analysis of gradient boosting algorithms</article-title>
          <source>Artif Intell Rev</source>
          <year>2020</year>
          <month>08</month>
          <day>24</day>
          <volume>54</volume>
          <issue>3</issue>
          <fpage>1937</fpage>
          <lpage>1967</lpage>
          <pub-id pub-id-type="doi">10.1007/s10462-020-09896-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cumming</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Knutson</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Cameron</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Derrick</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>A comparative analysis of claims-based methods of health risk assessment for commercial populations: Final report to the Society of Actuaries</article-title>
          <source>Society of Actuaries</source>
          <year>2002</year>
          <access-date>2023-03-31</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.soa.org/globalassets/assets/Files/Research/Projects/risk-assessmentc.pdf">https://www.soa.org/globalassets/assets/Files/Research/Projects/risk-assessmentc.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hastie</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tibshirani</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Additive logistic regression: a statistical view of boosting (with discussion and a rejoinder by the authors)</article-title>
          <source>The Annals of Statistics</source>
          <year>2000</year>
          <volume>28</volume>
          <issue>2</issue>
          <fpage>337</fpage>
          <lpage>407</lpage>
          <pub-id pub-id-type="doi">10.1214/aos/1016218223</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McInnes</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Healy</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Melville</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>UMAP: Uniform manifold approximation and projection for dimension reduction</article-title>
          <source>Journal of Open Source Software</source>
          <year>2018</year>
          <volume>29</volume>
          <issue>3</issue>
          <fpage>861</fpage>
          <pub-id pub-id-type="doi">10.21105/joss.00861</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Campello</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Moulavi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sander</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Density-based clustering based on hierarchical density estimates</article-title>
          <year>2013</year>
          <conf-name>PAKDD 2013: Advances in Knowledge Discovery and Data Mining</conf-name>
          <conf-date>April 14-17, 2013</conf-date>
          <conf-loc>Gold Coast, QLD, Australia</conf-loc>
          <publisher-loc>Berlin/Heidelberg, Germany</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>160</fpage>
          <lpage>172</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-642-37456-2_14</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Van</surname>
              <given-names>RG</given-names>
            </name>
            <name name-style="western">
              <surname>Drake</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <source>Python 3 Reference Manual</source>
          <year>2009</year>
          <publisher-loc>Scotts Valley, CA</publisher-loc>
          <publisher-name>CreateSpace</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="web">
          <article-title>R: A Language and Environment for Statistical Computing</article-title>
          <source>The R Foundation</source>
          <year>2020</year>
          <access-date>2023-03-31</access-date>
          <publisher-loc>Vienna, Austria</publisher-loc>
          <publisher-name>R Foundation for Statistical Computing</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.R-project.org/">https://www.R-project.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Řehůřek</surname>
              <given-names>Radim</given-names>
            </name>
            <name name-style="western">
              <surname>Sojka</surname>
              <given-names>Petr</given-names>
            </name>
          </person-group>
          <article-title>Software Framework for Topic Modelling with Large Corpora</article-title>
          <year>2010</year>
          <conf-name>LREC 2010 Workshop New Challenges for NLP Frameworks</conf-name>
          <conf-date>May 22, 2010</conf-date>
          <conf-loc>Valletta, Malta</conf-loc>
          <fpage>46</fpage>
          <lpage>50</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Akiba</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sano</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yanase</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ohta</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Koyama</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Optuna: A next-generation hyperparameter optimization framework</article-title>
          <year>2019</year>
          <conf-name>25th ACM SIGKDD international conference on knowledge discovery and data mining</conf-name>
          <conf-date>August 4-8, 2019</conf-date>
          <conf-loc>Anchorage, AK</conf-loc>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Association for Computing Machinery</publisher-name>
          <fpage>2623</fpage>
          <lpage>2631</lpage>
          <pub-id pub-id-type="doi">10.1145/3292500.3330701</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquaux</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gramfort</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Michel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Thirion</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grisel</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Blondel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Prettenhofer</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dubourg</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Vanderplas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Passos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cournapeau</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Brucher</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Perrot</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Duchesnay</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Scikit-learn: Machine Learning in Python</article-title>
          <source>The Journal of Machine Learning Research</source>
          <year>2011</year>
          <month>11</month>
          <day>1</day>
          <volume>12</volume>
          <fpage>2825</fpage>
          <lpage>2830</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.5555/1953048.2078195"/>
          </comment>
          <pub-id pub-id-type="doi">10.5555/1953048.2078195</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wickham</surname>
              <given-names>Hadley</given-names>
            </name>
          </person-group>
          <source>ggplot2: Elegant Graphics for Data Analysis</source>
          <year>2016</year>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Starker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Buttmann-Schweiger</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Krause</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Barnes</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Kraywinkel</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Holmberg</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>[Cancer screening in Germany: availability and participation]</article-title>
          <source>Bundesgesundheitsblatt Gesundheitsforschung Gesundheitsschutz</source>
          <year>2018</year>
          <month>12</month>
          <volume>61</volume>
          <issue>12</issue>
          <fpage>1491</fpage>
          <lpage>1499</lpage>
          <pub-id pub-id-type="doi">10.1007/s00103-018-2842-8</pub-id>
          <pub-id pub-id-type="medline">30406892</pub-id>
          <pub-id pub-id-type="pii">10.1007/s00103-018-2842-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ash</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Ellis</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>Ayanian</surname>
              <given-names>JZ</given-names>
            </name>
            <name name-style="western">
              <surname>Pope</surname>
              <given-names>GC</given-names>
            </name>
            <name name-style="western">
              <surname>Bowen</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Weyuker</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Predicting pharmacy costs and other medical costs using diagnoses and drug claims</article-title>
          <source>Med Care</source>
          <year>2005</year>
          <month>01</month>
          <volume>43</volume>
          <issue>1</issue>
          <fpage>34</fpage>
          <lpage>43</lpage>
          <pub-id pub-id-type="medline">15626932</pub-id>
          <pub-id pub-id-type="pii">00005650-200501000-00006</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Guestrin</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>XGBoost: A scalable tree boosting system</article-title>
          <year>2016</year>
          <conf-name>22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name>
          <conf-date>August 13-17, 2016</conf-date>
          <conf-loc>San Francisco, CA</conf-loc>
          <fpage>785</fpage>
          <lpage>794</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1603.02754"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/2939672.2939785</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hochreiter</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidhuber</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Long short-term memory</article-title>
          <source>Neural Comput</source>
          <year>1997</year>
          <month>11</month>
          <day>15</day>
          <volume>9</volume>
          <issue>8</issue>
          <fpage>1735</fpage>
          <lpage>80</lpage>
          <pub-id pub-id-type="doi">10.1162/neco.1997.9.8.1735</pub-id>
          <pub-id pub-id-type="medline">9377276</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Adkins</surname>
              <given-names>DE</given-names>
            </name>
          </person-group>
          <article-title>Machine Learning and Electronic Health Records: A Paradigm Shift</article-title>
          <source>Am J Psychiatry</source>
          <year>2017</year>
          <month>02</month>
          <day>01</day>
          <volume>174</volume>
          <issue>2</issue>
          <fpage>93</fpage>
          <lpage>94</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28142275"/>
          </comment>
          <pub-id pub-id-type="doi">10.1176/appi.ajp.2016.16101169</pub-id>
          <pub-id pub-id-type="medline">28142275</pub-id>
          <pub-id pub-id-type="pmcid">PMC5807064</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref70">
        <label>70</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lundberg</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>A unified approach to interpreting model predictions</article-title>
          <year>2017</year>
          <conf-name>31th Conference on Neural Information Processing Systems</conf-name>
          <conf-date>December 4-9, 2017</conf-date>
          <conf-loc>Long Beach, CA</conf-loc>
          <publisher-loc>Red Hook, NY</publisher-loc>
          <publisher-name>Curran Associates</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://proceedings.neurips.cc/paper/2017/file/8a20a8621978632d76c43dfd28b67767-Paper.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref71">
        <label>71</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lundberg</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Erion</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>DeGrave</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Prutkin</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Nair</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Katz</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Himmelfarb</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bansal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>From Local Explanations to Global Understanding with Explainable AI for Trees</article-title>
          <source>Nat Mach Intell</source>
          <year>2020</year>
          <month>01</month>
          <volume>2</volume>
          <issue>1</issue>
          <fpage>56</fpage>
          <lpage>67</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32607472"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s42256-019-0138-9</pub-id>
          <pub-id pub-id-type="medline">32607472</pub-id>
          <pub-id pub-id-type="pmcid">PMC7326367</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref72">
        <label>72</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pareek</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Seyyedi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Banerjee</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Lungren</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Fusion of medical imaging and electronic health records using deep learning: a systematic review and implementation guidelines</article-title>
          <source>NPJ Digit Med</source>
          <year>2020</year>
          <volume>3</volume>
          <fpage>136</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-020-00341-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-020-00341-z</pub-id>
          <pub-id pub-id-type="medline">33083571</pub-id>
          <pub-id pub-id-type="pii">341</pub-id>
          <pub-id pub-id-type="pmcid">PMC7567861</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref73">
        <label>73</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tayefi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ngo</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Chomutare</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Dalianis</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Salvi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Budrionis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Godtliebsen</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Challenges and opportunities beyond structured data in analysis of electronic health records</article-title>
          <source>WIREs Comp Stat</source>
          <year>2021</year>
          <month>02</month>
          <day>14</day>
          <volume>13</volume>
          <issue>6</issue>
          <fpage>e1549</fpage>
          <pub-id pub-id-type="doi">10.1002/wics.1549</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref74">
        <label>74</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Steiger</surname>
              <given-names>Edgar</given-names>
            </name>
            <name name-style="western">
              <surname>Kroll</surname>
              <given-names>Lars Eric</given-names>
            </name>
          </person-group>
          <article-title>Pat2Vec</article-title>
          <source>Hugging Face</source>
          <access-date>2023-03-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://huggingface.co/zidatasciencelab/Pat2Vec">https://huggingface.co/zidatasciencelab/Pat2Vec</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
