<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR AI</journal-id>
      <journal-title>JMIR AI</journal-title>
      <issn pub-type="epub">2817-1705</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v2i1e44835</article-id>
      <article-id pub-id-type="pmid">38875570</article-id>
      <article-id pub-id-type="doi">10.2196/44835</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Natural Language Processing for Clinical Laboratory Data Repository Systems: Implementation and Evaluation for Respiratory Viruses</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Malin</surname>
            <given-names>Bradley</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>El Emam</surname>
            <given-names>Khaled</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Huo</surname>
            <given-names>Yuankai</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zhao</surname>
            <given-names>Peng, PhD</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Teles</surname>
            <given-names>Ariel</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Dolatabadi</surname>
            <given-names>Elham</given-names>
          </name>
          <degrees>BSc, MSc, PhD</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <address>
            <institution>Vector Institute</institution>
            <addr-line>661 University Ave</addr-line>
            <addr-line>Toronto, ON, M5G 1M1</addr-line>
            <country>Canada</country>
            <phone>1 6477069756</phone>
            <email>elham.dolatabadi@gmail.com</email>
          </address>
          <xref rid="aff02" ref-type="aff">2</xref>
          <xref rid="aff03" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2236-2611</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Branson</given-names>
          </name>
          <degrees>BHS, MSc</degrees>
          <xref rid="aff04" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4224-0753</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Buchan</surname>
            <given-names>Sarah A</given-names>
          </name>
          <degrees>MSc, PhD</degrees>
          <xref rid="aff03" ref-type="aff">3</xref>
          <xref rid="aff04" ref-type="aff">4</xref>
          <xref rid="aff05" ref-type="aff">5</xref>
          <xref rid="aff06" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5286-8974</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Austin</surname>
            <given-names>Alex Marchand</given-names>
          </name>
          <degrees>BSc, MSc</degrees>
          <xref rid="aff04" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5932-2037</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Azimaee</surname>
            <given-names>Mahmoud</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff03" ref-type="aff">3</xref>
          <xref rid="aff04" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6704-5129</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>McGeer</surname>
            <given-names>Allison</given-names>
          </name>
          <degrees>MSc, MD, PhD</degrees>
          <xref rid="aff03" ref-type="aff">3</xref>
          <xref rid="aff06" ref-type="aff">6</xref>
          <xref rid="aff07" ref-type="aff">7</xref>
          <xref rid="aff08" ref-type="aff">8</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5647-6137</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Mubareka</surname>
            <given-names>Samira</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff08" ref-type="aff">8</xref>
          <xref rid="aff09" ref-type="aff">9</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5012-2311</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Kwong</surname>
            <given-names>Jeffrey C</given-names>
          </name>
          <degrees>MSc, MD</degrees>
          <xref rid="aff04" ref-type="aff">4</xref>
          <xref rid="aff05" ref-type="aff">5</xref>
          <xref rid="aff06" ref-type="aff">6</xref>
          <xref rid="aff10" ref-type="aff">10</xref>
          <xref rid="aff11" ref-type="aff">11</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7820-2046</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff01">
        <label>1</label>
        <institution>Vector Institute</institution>
        <addr-line>Toronto, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff02">
        <label>2</label>
        <institution>School of Health Policy and Management, Faculty of Health</institution>
        <institution>York University</institution>
        <addr-line>Toronto, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff03">
        <label>3</label>
        <institution>Institute of Health Policy, Management and Evaluation</institution>
        <institution>University of Toronto</institution>
        <addr-line>Toronto, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff04">
        <label>4</label>
        <institution>ICES</institution>
        <addr-line>Toronto, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff05">
        <label>5</label>
        <institution>Public Health Ontario</institution>
        <addr-line>Toronto, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff06">
        <label>6</label>
        <institution>Dalla Lana School of Public Health</institution>
        <institution>University of Toronto</institution>
        <addr-line>Toronto, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff07">
        <label>7</label>
        <institution>Sinai Health System</institution>
        <addr-line>Toronto, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff08">
        <label>8</label>
        <institution>Department of Laboratory Medicine and Pathobiology</institution>
        <institution>University of Toronto</institution>
        <addr-line>Toronto, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff09">
        <label>9</label>
        <institution>Sunnybrook Research Institute</institution>
        <addr-line>Toronto, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff10">
        <label>10</label>
        <institution>University Health Network</institution>
        <addr-line>Toronto, ON</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff11">
        <label>11</label>
        <institution>Department of Family and Community Medicine</institution>
        <institution>University of Toronto</institution>
        <addr-line>Toronto, ON</addr-line>
        <country>Canada</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Elham Dolatabadi <email>elham.dolatabadi@gmail.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2023</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>6</day>
        <month>6</month>
        <year>2023</year>
      </pub-date>
      <volume>2</volume>
      <elocation-id>e44835</elocation-id>
      <history>
        <date date-type="received">
          <day>5</day>
          <month>12</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>10</day>
          <month>3</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>31</day>
          <month>3</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>18</day>
          <month>4</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Elham Dolatabadi, Branson Chen, Sarah A Buchan, Alex Marchand Austin, Mahmoud Azimaee, Allison McGeer, Samira Mubareka, Jeffrey C Kwong. Originally published in JMIR AI (https://ai.jmir.org), 06.06.2023.</copyright-statement>
      <copyright-year>2023</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR AI, is properly cited. The complete bibliographic information, a link to the original publication on https://www.ai.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://ai.jmir.org/2023/1/e44835" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>With the growing volume and complexity of laboratory repositories, it has become tedious to parse unstructured data into structured and tabulated formats for secondary uses such as decision support, quality assurance, and outcome analysis. However, advances in natural language processing (NLP) approaches have enabled efficient and automated extraction of clinically meaningful medical concepts from unstructured reports.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>In this study, we aimed to determine the feasibility of using the NLP model for information extraction as an alternative approach to a time-consuming and operationally resource-intensive handcrafted rule-based tool. Therefore, we sought to develop and evaluate a deep learning–based NLP model to derive knowledge and extract information from text-based laboratory reports sourced from a provincial laboratory repository system.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>The NLP model, a hierarchical multilabel classifier, was trained on a corpus of laboratory reports covering testing for 14 different respiratory viruses and viral subtypes. The corpus includes 87,500 unique laboratory reports annotated by 8 subject matter experts (SMEs). The classification task involved assigning the laboratory reports to labels at 2 levels: 24 fine-grained labels in level 1 and 6 coarse-grained labels in level 2. A “label” also refers to the status of a specific virus or strain being tested or detected (eg, influenza A is detected). The model’s performance stability and variation were analyzed across all labels in the classification task. Additionally, the model's generalizability was evaluated internally and externally on various test sets.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Overall, the NLP model performed well on internal, out-of-time (pre–COVID-19), and external (different laboratories) test sets with microaveraged <italic>F</italic><sub>1</sub>-scores &#62;94% across all classes. Higher precision and recall scores with less variability were observed for the internal and pre–COVID-19 test sets. As expected, the model’s performance varied across categories and virus types due to the imbalanced nature of the corpus and sample sizes per class. There were intrinsically fewer classes of viruses being detected than those tested; therefore, the model's performance (lowest F<sub>1</sub>-score of 57%) was noticeably lower in the detected cases.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>We demonstrated that deep learning–based NLP models are promising solutions for information extraction from text-based laboratory reports. These approaches enable scalable, timely, and practical access to high-quality and encoded laboratory data if integrated into laboratory information system repositories.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>health</kwd>
        <kwd>informatics</kwd>
        <kwd>natural language processing</kwd>
        <kwd>knowledge extraction</kwd>
        <kwd>electronic health record</kwd>
        <kwd>EHR</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Clinical laboratory data account for a large proportion of data stored in electronic health record systems worldwide and present a wealth of information vital for evidence-based decision-making and public health improvement [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Laboratory information systems record, manage, and store laboratory test data to facilitate reporting to clinicians and jurisdictional laboratory information repositories [<xref ref-type="bibr" rid="ref3">3</xref>]. These repositories often include test orders and results from various laboratory service providers, such as hospitals, public health agencies, and private companies, and are populated as part of clinical care.</p>
      <p>Several factors limit the secondary use of laboratory data for other purposes. The most important are concerns about the quality of the data, lack of standardization, and difficulty extracting the needed information [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. Laboratory data vary over time due to evolving standards of care and changing population demographics. Furthermore, specific categories of laboratory data are reported as free text in an unstructured format with no standard vocabulary in the actual contents, which adds more complexity for their secondary uses [<xref ref-type="bibr" rid="ref1">1</xref>]. Therefore, efforts are needed to eliminate redundancies, extract the necessary information, and derive accurate interpretations from laboratory data.</p>
      <p>Our institute, ICES, has developed a specific information extraction workflow to manage the interpretation of a large volume of provincial clinical laboratory results, as shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>. The workflow, called a semi–rule-based workflow, relies on time-consuming and operationally resource-intensive approaches, including a library of rule-based and handcrafted tools. These tools are explicitly programmed for various laboratory result categories and must be refined continually. To address challenges with our existing semi–rule-based workflow and automate the exhaustive information retrieval task, we built a deep learning–based natural language processing (NLP) tool. The objective of this study was to assess the feasibility of our deep learning–based NLP model and evaluate its performance relative to the semi–rule-based workflow.</p>
      <p>The development of NLP methods is essential to automatically transform laboratory reports into a structured representation that scales data usability for research, quality improvement, and clinical purposes [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref12">12</xref>]. NLP enables automated extraction of information, and its use in the clinical domain is growing, with increasing uptake in various applications such as biomedical named entity recognition [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>], summarization [<xref ref-type="bibr" rid="ref10">10</xref>], and clinical prediction tasks [<xref ref-type="bibr" rid="ref9">9</xref>]. More recently, deep learning approaches such as convolutional neural networks, recurrent neural networks (RNNs), and RNN variants such as bidirectional long short-term memory (Bi-LSTM) have been successfully applied to clinical NLP tasks [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref16">16</xref>]. They are now considered the baseline techniques for various information extraction tasks [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref20">20</xref>].</p>
      <p>In this study, we focused on automating the retrieval of information related to respiratory viruses from the laboratory repository of Ontario, Canada’s most populous province. Respiratory viruses account for a substantial burden of disease globally [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>], causing both respiratory and nonrespiratory illnesses [<xref ref-type="bibr" rid="ref23">23</xref>]. It is impossible to distinguish which respiratory virus is causing infection based on clinical examination alone, necessitating laboratory testing for confirmation. We sought to (1) implement a deep learning–based NLP predictive model to extract respiratory virus information from the laboratory repository and (2) evaluate the generalizability and robustness of predictions (extracted information) across different categories of respiratory viruses and test sets. Our study findings can inform public health practitioners and researchers about using NLP approaches to empower and facilitate access and retrieval of information from a collection of text-based laboratory reports without any time-consuming handcrafted rule-based approaches. This can facilitate the development of a scalable and easily deployable automated information extraction tool.</p>
      <fig id="figure1" position="float">
        <label>Figure 1</label>
        <caption>
          <p>Semi–rule-based workflow versus fully automated deep learning natural language processing (NLP) approach. Semi–rule-based relies on time-consuming and operationally resource-intensive approaches for the information extraction task. The corpus was derived from the Ontario Laboratories Information System (OLIS). Following basic text-cleaning steps, around 87,500 unique laboratory reports were collected and included in our corpus to be used in parallel by both semi–rule-based and deep learning NLP approaches. Semi–rule-based workflow is a multistep procedure where all the unique reports were grouped by Logical Observation Identifiers Names and Codes (LOINC), year, and location in the first step. In the second step, subject matter experts (SMEs) created a list of dictionaries for terms related to the different viruses and strains and a set of if-then-else rules to generate interpretations and extract information from each laboratory report. The dictionaries and if-then-else rules were packaged as a python library called the rule-based text parser. Finally, the parser was improved based on inputs from 3 SMEs in an iterative manner.</p>
        </caption>
        <graphic xlink:href="ai_v2i1e44835_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Study Design</title>
        <p>The data set used in this study was a collection of laboratory reports that covered testing for 14 different respiratory viruses and viral subtypes (<xref ref-type="table" rid="table1">Table 1</xref>), most of which were in the form of texts. The reports were text-based and required cleaning, parsing, and encoding.</p>
        <p>The data set was derived from the Ontario Laboratories Information System (OLIS). OLIS has over 100 contributors, which comprise hospital, commercial, and public health laboratories, adding to the complexity and variability of the clinical data. These data were analyzed at ICES.</p>
        <p>The automated encoding of laboratory testing reports into respiratory viruses is framed as a multilabel hierarchical classification task to address the needs of knowledge users in our institute in distinguishing respiratory viruses. According to our users, information at 2 resolution levels is needed: high and low. Therefore, we defined 2 levels of a classification hierarchy, and at each level, the classification was multilabel. Each input text sequence was assigned to a nonempty subset of various labels, as shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>. In the first level of the hierarchy, the classifier assigned outputs to 24 mutually nonexclusive fine-grained labels. The fine-grained labels were reassigned to 6 coarse-grained sets of labels in the second level of the classification hierarchy. In this work, “sequence” refers to the input laboratory reports to the NLP model, which may be single or several sentences. A “label” also refers to a status of a specific virus or strain being tested or detected.</p>
        <p>To summarize, the information extraction for an input text sequence involved retrieving virus types and identifying their status as being tested and/or detected. <xref rid="figure2" ref-type="fig">Figure 2</xref> illustrates a running example of the input and output of the deep learning–based NLP model.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Details of the respiratory viruses embedded in text-based laboratory reports derived from the Ontario Laboratories Information System (OLIS). Specimens may be tested for 1 or more of the following viruses: influenza, RSV<sup>a</sup>, adenoviruses, seasonal coronaviruses, enterovirus/rhinoviruses, parainfluenza viruses, HMV<sup>b</sup>, and bocavirus<sup>c</sup>.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="370"/>
            <col width="250"/>
            <col width="200"/>
            <col width="180"/>
            <thead>
              <tr valign="top">
                <td>Viruses</td>
                <td>Mention counts<sup>d</sup>,<break/>n (%)</td>
                <td>Tested<sup>e</sup>,<break/>n (%)</td>
                <td>Detected<sup>f</sup>,<break/>n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Adenovirus</td>
                <td>21,614 (7)</td>
                <td>45 (6)</td>
                <td>2 (1)</td>
              </tr>
              <tr valign="top">
                <td>Bocavirus</td>
                <td>5112 (2)</td>
                <td>96 (13)</td>
                <td>5 (3)</td>
              </tr>
              <tr valign="top">
                <td>Coronavirus (seasonal)</td>
                <td>9128 (3)</td>
                <td>95 (13)</td>
                <td>9 (5)</td>
              </tr>
              <tr valign="top">
                <td>Any influenza</td>
                <td>49,282 (16)</td>
                <td>78 (11)</td>
                <td>35 (20)</td>
              </tr>
              <tr valign="top">
                <td>Influenza A</td>
                <td>44,753 (15)</td>
                <td>80 (11)</td>
                <td>30 (18)</td>
              </tr>
              <tr valign="top">
                <td>Influenza A H1</td>
                <td>6797 (2)</td>
                <td>N/A<sup>g</sup></td>
                <td>17 (10)</td>
              </tr>
              <tr valign="top">
                <td>Influenza A H3</td>
                <td>9929 (3)</td>
                <td>N/A</td>
                <td>18 (10)</td>
              </tr>
              <tr valign="top">
                <td>Influenza B</td>
                <td>40,840 (13)</td>
                <td>78 (11)</td>
                <td>12 (7)</td>
              </tr>
              <tr valign="top">
                <td>Enterovirus/rhinovirus</td>
                <td>13,262 (4)</td>
                <td>92 (13)</td>
                <td>19 (11)</td>
              </tr>
              <tr valign="top">
                <td>HMV</td>
                <td>21,194 (7)</td>
                <td>46 (6)</td>
                <td>3 (2)</td>
              </tr>
              <tr valign="top">
                <td>Parainfluenza</td>
                <td>21,584 (7)</td>
                <td>46 (6)</td>
                <td>4 (2)</td>
              </tr>
              <tr valign="top">
                <td>Any RSV</td>
                <td>38,080 (12)</td>
                <td>68 (9)</td>
                <td>11 (6)</td>
              </tr>
              <tr valign="top">
                <td>RSV A</td>
                <td>11,227 (4)</td>
                <td>N/A</td>
                <td>2 (1)</td>
              </tr>
              <tr valign="top">
                <td>RSV B</td>
                <td>11,094 (4)</td>
                <td>N/A</td>
                <td>3 (2)</td>
              </tr>
              <tr valign="top">
                <td>Total</td>
                <td>303,896 (100)</td>
                <td>724 (100)</td>
                <td>170 (100)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>RSV: respiratory syncytial virus.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>HMV: human metapneumovirus.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>The testing modalities employed include single and multiplex polymerase chain reaction (PCR), direct fluorescent antibody, viral culture, and enzyme immunoassay rapid antigen tests. Repeated testing may involve multiple laboratories and testing modalities.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>Represents the counts of specific virus terms from all the distinct laboratory reports (unique sequences). It does not provide any clinical information regarding the prevalence of the aforementioned viruses in Ontario.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>Represents the proportion of mentions flagged as tested by the parser.</p>
            </fn>
            <fn id="table1fn6">
              <p><sup>f</sup>Represents the proportion of mentions flagged as positively detected by the parser. Note that tested and detected are not mutually exclusive; we first determined whether it was tested for (ie, has e a result) and then flagged it as detected if the result is positive. Detected is a subset of the tested.</p>
            </fn>
            <fn id="table1fn7">
              <p><sup>g</sup>N/A: not applicable. Note that the subtypes of influenza A and RSV were only analyzed for detection but not testing, as the scope of the planned analyses for using the respiratory virus data was primarily focused on the larger virus categories.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>The fully automated deep learning–based natural language processing (NLP) approach is a hierarchical-based multilabel classification task that retrieves virus (or strain) types and identifies their status as being tested and/or detected. 
Note that a sequence refers to the input laboratory reports to the NLP approach, which may be a single or several sentences. A label also refers to the status of a specific virus or strain (tested or detected). “influenza is tested” implies it was tested for any influenza type; however, the total number of “influenza is tested” is greater than the total number of “influenza A tested + influenza B tested” since not all influenza types are mentioned. The same applies to “influenza is detected” and “RSV is tested.” HMV: human metapneumovirus; NAAT: nucleic acid amplification test; PCR: polymerase chain reaction; RSV: respiratory syncytial virus.</p>
          </caption>
          <graphic xlink:href="ai_v2i1e44835_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Corpus Development Description</title>
        <sec>
          <title>About OLIS</title>
          <p>To create the corpus for this study, over a million observations corresponding to 99 unique Logical Observation Identifiers Names and Codes (LOINC) were pulled from OLIS, and the text-based laboratory results were extracted from the observations. OLIS was created and is managed by Ontario Health, from whom ICES receives an ongoing data feed. At the time of writing this paper, the OLIS data held at ICES consists of &#62;9000 unique LOINC and &#62;5 billion laboratory observations across 150 laboratory test centers in Ontario. As such, the clinical laboratory data have considerable complexity and variability.</p>
        </sec>
        <sec>
          <title>Development of the Ground Truth</title>
          <p>In this study, we leveraged the semi–rule-based workflow, an information extraction workflow relying on a rule-based and handcrafted tools library, to create ground truth for the deep learning model. A group of 8 SMEs was engaged in performing the required tasks in the workflow; they comprised 2 infectious disease epidemiologists (authors JCK and SAB), 2 infectious disease microbiologists (AM and SM), a genomic specialist (AMA), a research methodologist (MA), a data analyst (BC), and a machine learning scientist (ED). These tasks included basic text cleaning, quality checking, and rule-based algorithm development for interpreting reports, as shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>. In our institute, LOINC are mainly used to filter OLIS observations into relevant groupings (eg, respiratory viruses) and not for encoding and interpretation since they are not always used appropriately by those entering the data into OLIS. Consequently, the SMEs identified a list of 99 LOINC related to respiratory viruses, and all the laboratory reports in OLIS corresponding to these LOINC were retrieved. The workflow consists of 3 tasks, which are detailed in the subsequent paragraphs.</p>
          <p>First, the data analyst and data scientist (authors BC and ED) scanned the text strings. After performing basic text cleaning (eg, removing punctuations, stop words, case normalization, lemmatization, and stemming) and removing duplicates, they created a meaningful list of 87,500 unique laboratory reports.</p>
          <p>Next, the unique reports were grouped by laboratory and facility names, LOINC, and year. Then, 3 SMEs, including 2 analysts and an infectious disease specialist, manually reviewed multiple samples per group and created a knowledge base and sets of if-then-else rules to generate interpretations for each laboratory report. Specifically, the knowledge base consisted of dictionaries for terms related to the different viruses and strains. The if-then-else rules provided instructions for grouping virus terms with respective results packaged as a Python library, which we refer to in this study as the rule-based text parser.</p>
          <p>Following the initial development of the rule-based text parser, it was improved based on inputs from 3 other SMEs in an iterative manner. The text parser was applied to the entire corpus to generate annotations at each iteration. Next, the data analyst manually reviewed the interpretations and flagged unclear results to be reviewed by SMEs at another iteration. In addition, a small random sample of unflagged test results was provided to SMEs to be reviewed at this iteration. The SMEs subsequently reviewed the list and provided new rules to be added to the text parser. This procedure was repeated until there were no more flagged test results.</p>
        </sec>
      </sec>
      <sec>
        <title>Model Development and Evaluation</title>
        <sec>
          <title>NLP Model Description</title>
          <p>The deep learning–based NLP model consisted of 3 components that were trained jointly: the word embedding layer, the Bi-LSTM layer, and the output layer. The word embedding layer computed a vector representation of each word in the text as a combination of a character-based representation learning model [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>] and word vectors initialized with pretrained global vectors (GloVe) embeddings [<xref ref-type="bibr" rid="ref26">26</xref>]. The embedding layer was coupled with a Bi-LSTM on top of it to generate conceptually and contextually meaningful representations of words. An output layer of a size equal to the number of distinct labels was placed on top of Bi-LSTM, and the last hidden state of the Bi-LSTM was projected into the output layer.</p>
        </sec>
        <sec>
          <title>Model Evaluation</title>
          <p>The model’s robustness and generalizability were evaluated internally and externally on various test sets, as shown in <xref ref-type="table" rid="table2">Table 2</xref>. The internal test set used for model training was a randomly sampled subset representing 10% (n=6719) of the laboratory reports from OLIS from 2007 to 2018. The performance of the model was also evaluated on 2 out-of-time test sets, including samples from an entirely different time period: (1) a large pre–COVID-19 (2019) sample and (2) a small post–COVID-19 (2020) sample. A separate test set, denoted as the external test set, included samples up to 2019 from 2 separate laboratories (testing sites not included in the development of the model) and was used to assess the external generalizability of the model. <italic>F</italic><sub>1</sub>-scores, along with precision and recall scores, were calculated for the model’s predictions. A 2-tailed paired <italic>t</italic> test was used to determine whether there was a statistically significant difference in the <italic>F</italic><sub>1</sub>-scores between classes and test sets. In addition, 95% CIs were calculated for the precision and recall scores to quantify the uncertainty of the model's estimates.</p>
          <table-wrap position="float" id="table2">
            <label>Table 2</label>
            <caption>
              <p>Data set statistics for laboratory descriptions of the development and test sets.</p>
            </caption>
            <table border="1" rules="groups" cellpadding="5" frame="hsides" width="1000" cellspacing="0">
              <col width="30"/>
              <col width="130"/>
              <col width="120"/>
              <col width="120"/>
              <col width="120"/>
              <col width="120"/>
              <col width="120"/>
              <col width="120"/>
              <col width="120"/>
              <thead>
                <tr valign="top">
                  <td colspan="2">Cohorts</td>
                  <td>Sequences<sup>a</sup>, n (%)</td>
                  <td colspan="2">Any influenza virus<sup>b</sup></td>
                  <td colspan="2">Any RSV<sup>c</sup> virus</td>
                  <td colspan="2">Any virus</td>
                </tr>
                <tr valign="top">
                  <td colspan="2">Total</td>
                  <td>87411 (100%)</td>
                  <td>Detected, n (%)</td>
                  <td>Tested, n (%)</td>
                  <td>Detected, n (%)</td>
                  <td>Tested, n (%)</td>
                  <td>Detected, n (%)</td>
                  <td>Tested, n (%)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="9">
                    <bold>Development set (2009-2018)</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Training set</td>
                  <td>60,471 (69)</td>
                  <td>13,792 (16)</td>
                  <td>35,292 (40)</td>
                  <td>3959 (4)</td>
                  <td>27,196 (31)</td>
                  <td>22,284 (25)</td>
                  <td>40,652 (46)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Internal test set</td>
                  <td>6719 (8)</td>
                  <td>1604 (2)</td>
                  <td>3941 (4)</td>
                  <td>428 (0.5)</td>
                  <td>3009 (3)</td>
                  <td>2541 (3)</td>
                  <td>4534 (5)</td>
                </tr>
                <tr valign="top">
                  <td colspan="9">
                    <bold>Out-of-time test sets</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Pre–COVID-19 (2019)</td>
                  <td>15,908 (18)</td>
                  <td>3019 (3)</td>
                  <td>6903 (8)</td>
                  <td>706 (0.8)</td>
                  <td>5957 (7)</td>
                  <td>4745 (5)</td>
                  <td>8643 (10)</td>
                </tr>
                <tr valign="top">
                  <td/>
                  <td>Post–COVID-19 (2020)</td>
                  <td>100 (0.01)</td>
                  <td>N/A<sup>d</sup></td>
                  <td>11 (0.01)</td>
                  <td>&#60;6 (0.006)</td>
                  <td>11 (0.01)</td>
                  <td>&#60;6 (0.006)</td>
                  <td>27 (0.03)</td>
                </tr>
                <tr valign="top">
                  <td colspan="2">External test set (2009-2018)</td>
                  <td>4213 (5)</td>
                  <td>864 (1)</td>
                  <td>3020 (34)</td>
                  <td>261 (0.2)</td>
                  <td>2546 (3)</td>
                  <td>1431 (2)</td>
                  <td>3237 (4)</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table2fn1">
                <p><sup>a</sup>Represents the counts of unique sequences; a sequence refers to the input laboratory reports to the NLP model, which may be a single sentence or several sentences.</p>
              </fn>
              <fn id="table2fn2">
                <p><sup>b</sup>Detected and tested represent the aggregation of the proportion of any mentions of the virus terms from the total unique sequences in the data set.</p>
              </fn>
              <fn id="table2fn3">
                <p><sup>c</sup>RSV: respiratory syncytial virus.</p>
              </fn>
              <fn id="table2fn4">
                <p><sup>d</sup>N/A: not applicable.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>The use of the data in this study was approved by the ICES Privacy and Legal Office. Projects that solely use data collected by ICES under section 45 of Ontario’s Personal Health Information Protection Act (PHIPA) are exempt from research ethics board review. Section 45 of the PHIPA authorizes ICES to collect personal health information, without consent for the purpose of analyzing or compiling statistical information concerning the management, evaluation, monitoring, and allocation of resources to or planning for the health system.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>The development corpus, including training and test sets, included 87,500 sequences involving ~5 million tokens. The summary statistics for the data sets are shown in <xref ref-type="table" rid="table2">Table 2</xref>. The NLP model was implemented in TensorFlow on an NVidia Tesla (Nvidia) graphics processing unit, and Adam optimization was used as the optimization algorithm (more details in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The maximum sequence length was fixed to 400 words. The model was trained several times with random initialization on the development corpus, and the results of the top 10 best-performing models on the test sets are presented in this paper. The results for the fine-grained classification in the first level of the hierarchy are presented in <xref ref-type="table" rid="table3">Table 3</xref> and aggregated by microaveraging across the 24 fine-grained labels. Detailed performance for each label is also shown in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>. The <italic>F</italic><sub>1</sub>-score performance of the model in the second level of the hierarchy, coarse-grained multilabel classification, for “any influenza,” “any RSV” (respiratory syncytial virus), and “any virus” are shown in <xref ref-type="table" rid="table3">Table 3</xref>. In addition, the variation of the model’s precision and recall scores using bar plots and 95% CIs are shown in <xref rid="figure3" ref-type="fig">Figure 3</xref>.</p>
      <p>As expected, the performance on the internal test set was better than the out-of-time (pre–COVID-19) and external test sets. In this regard, the <italic>F</italic><sub>1</sub>-score results of the test sets were compared, and noticeable differences were observed between the pairs of internal and out-of-time (pre–COVID-19) test sets, internal and out-of-time (post–COVID-19) test sets, and internal and external test sets. The out-of-time (post–COVID-19) test set was a small and imbalanced sample, including 100 sequences with &#60;6 mentions of any virus as being detected. The sample included 12 sequences labeled as being tested for coronavirus, and our model correctly classified them with an <italic>F</italic><sub>1</sub>-score of 0.67. Regarding the degree of uncertainty in the estimates, fewer variations in precision and recall scores are observed for the internal and out-of-time test sets (pre–COVID-19). On the contrary, the estimates on the out-of-time (post–COVID-19) and external test sets have larger CIs.</p>
      <p>In general, the models’ estimates on any test sets were variable across classes with varying degrees of uncertainty. The averaged <italic>F</italic><sub>1</sub>-scores of the estimates for both fine-grained (microaveraged) and “coarse-grained any virus” classes were above 90% on the internal test set. The <italic>F</italic><sub>1</sub>-score for the “coarse-grained any influenza detected” on all test sets was above 91%. Overall, the performance for coarse-grained detected classes was lower than for coarse-grained tested classes. Among the detected classes, the performance for “any influenza virus” was evidently higher than “any RSV virus.” The same result was observed between “any influenza virus” and “any RSV virus.” Comparably, larger CIs are evidenced for the “coarse-grained any RSV detected” estimates.</p>
      <table-wrap position="float" id="table3">
        <label>Table 3</label>
        <caption>
          <p>The prediction results (<italic>F</italic><sub>1</sub>-score) of the top 10 best-performing models on the in-time, out-of-time, and external test sets. The fine-grained results are aggregated by microaveraging across 24 fine-grained labels.</p>
        </caption>
        <table border="1" rules="groups" cellpadding="5" frame="hsides" width="1000" cellspacing="0">
          <col width="30"/>
          <col width="250"/>
          <col width="190"/>
          <col width="190"/>
          <col width="190"/>
          <col width="150"/>
          <thead>
            <tr valign="top">
              <td colspan="2">Variables</td>
              <td>Internal test set</td>
              <td colspan="2">Out-of-time test set<sup>a</sup></td>
              <td>External test set</td>
            </tr>
            <tr valign="top">
              <td colspan="2"/>
              <td/>
              <td>(Pre–COVID-19)</td>
              <td>(Post–COVID-19)</td>
              <td/>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="2">Fine-grained microaveraged, mean (SD)</td>
              <td>97.3 (0.25)</td>
              <td>94.31 (0.59)</td>
              <td>60.45 (7.99)</td>
              <td>96.23 (0.38)</td>
            </tr>
            <tr valign="top">
              <td colspan="6">
                <bold>Coarse-grained any influenza virus, mean (SD)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td/>
              <td>Detected<sup>b</sup></td>
              <td>97.64 (0.28)</td>
              <td>94.47 (1.04)</td>
              <td>N/A<sup>c</sup></td>
              <td>91.11 (2.14)</td>
            </tr>
            <tr valign="top">
              <td/>
              <td>Tested<sup>b</sup></td>
              <td>98.71 (0.15)</td>
              <td>97.26 (0.45)</td>
              <td>69.8 (4.43)</td>
              <td>98.94 (0.1)</td>
            </tr>
            <tr valign="top">
              <td colspan="6">
                <bold>Coarse-grained any RSV<sup>d</sup>, mean (SD)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td/>
              <td>Detected</td>
              <td>90.94 (1.7)</td>
              <td>81.56 (3.63)</td>
              <td>48.33 (44.76)</td>
              <td>57.68 (12.53)</td>
            </tr>
            <tr valign="top">
              <td/>
              <td>Tested</td>
              <td>98.16 (0.34</td>
              <td>96.18 (0.95</td>
              <td>95.6 (5.69)</td>
              <td>98.02 (0.47)</td>
            </tr>
            <tr valign="top">
              <td colspan="6">
                <bold>Coarse-grained any virus, mean (SD)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td/>
              <td>Detected</td>
              <td>95.01 (1)</td>
              <td>92.31 (1.59)</td>
              <td>31.71 (9.44)</td>
              <td>82.83 (3.27)</td>
            </tr>
            <tr valign="top">
              <td/>
              <td>Tested</td>
              <td>98.4 (0.17</td>
              <td>96.3 (0.35)</td>
              <td>75.87 (4.82)</td>
              <td>98.59 (0.2)</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table3fn1">
            <p><sup>a</sup>The out-of-time test set (post–COVID-19) is a very small and imbalanced sample, including only 100 sequences with no mentions of any virus detected.</p>
          </fn>
          <fn id="table3fn2">
            <p><sup>b</sup>Detected and tested represent the aggregation of the proportion of any mentions of the virus terms from the total unique sequences in the data set.</p>
          </fn>
          <fn id="table3fn3">
            <p><sup>c</sup>N/A: not applicable.</p>
          </fn>
          <fn id="table3fn4">
            <p><sup>d</sup>RSV: respiratory syncytial virus.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <fig id="figure3" position="float">
        <label>Figure 3</label>
        <caption>
          <p>The precision and recall scores of the predictions of the top 10 best-performing models with 95% CIs. The fine-grained results are aggregated by microaveraging across 24 fine-grained labels. RSV: respiratory syncytial virus.</p>
        </caption>
        <graphic xlink:href="ai_v2i1e44835_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this study, we demonstrated an implementation and evaluation of an NLP model for an automated and reductive information extraction task in a province-wide laboratory data repository. Our results suggest that the NLP model is a promising approach for information extraction from text-based laboratory reports as an alternative method to address the time-consuming and operationally resource-intensive nature of handcrafted rule-based models.</p>
      </sec>
      <sec>
        <title>Overview of Model Findings</title>
        <sec>
          <title>Generalization Across Various Test Sets</title>
          <p>Overall, the NLP solution, which was a hierarchical multilabel classifier, performed well on the internal, out-of-time (pre–COVID-19), and external (different laboratories) test sets. Except for the internal test sets, the other test sets were sourced from either a more recent time period or other laboratory sites, but the model was able to generalize well with microaveraged <italic>F</italic><sub>1</sub>-score &#62;94% across all classes. The performance of the model on the other out-of-time (post–COVID-19) test set was satisfactory; however, due to its small sample size with many underrepresented classes, it was not possible to draw any conclusion. The out-of-time (post–COVID-19) test set was pulled from the 2020 cohort to simulate a nonstationary production environment for observation.</p>
        </sec>
        <sec>
          <title>Stability and Performance Variation Between Classes</title>
          <p>In general, the model’s performance on any test sets was variable across classes and virus types due to the imbalanced nature of the corpus and sample sizes per class. There were intrinsically fewer classes of viruses detected compared with those tested. Therefore, the model’s performance was noticeably lower in the “detected” cases. Among the detected cases, the lowest performance was observed for RSV, and the highest performance among the tested cases was observed for influenza. Moreover, more considerable variations were observed for the positive predictive and sensitivity values of the detected classes, particularly for the “any RSV virus detected” class.</p>
        </sec>
      </sec>
      <sec>
        <title>Comparison With Prior Work</title>
        <p>Deep learning–based NLP approaches have demonstrated efficacy in many clinical NLP tasks and have thoroughly permeated the informatics community. The existing body of literature has mainly focused on using deep learning models to extract and interpret cancer-related clinical concepts [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>] from free text or other clinically meaningful entities from radiology reports or hospital notes [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. At the time of writing this paper, only 1 study has explored the use of an NLP system, Topaz, for the automated extraction and classification of influenza-related terms from text emergency reports [<xref ref-type="bibr" rid="ref29">29</xref>-<xref ref-type="bibr" rid="ref31">31</xref>]. To our knowledge, our study is the first to explore using deep learning models for efficient processing and extraction of clinically meaningful knowledge pertaining to respiratory viruses from a laboratory repository.</p>
        <p>One strength of the NLP approach used in this study is its scalability for various text-based laboratory scenarios. As the size and complexity of laboratory data grow, so does the need for scalable and reusable tools for automated extraction of knowledge from vast amounts of clinical notes and quick generalization from 1 task to another. Manual processing of laboratory reports severely limits the utilization of rich information embedded in the data repositories and makes the process of data cleaning and quality improvement prohibitively expensive. Usually, the rules learned from cleaning a single collection of laboratory reports show little generalizability toward other collections. On the other hand, deep learning–based NLP algorithms are well poised to scale the information extraction process. Although building deep learning–based NLP models is computationally intensive and memory demanding, the benefit-to-cost ratio of these models in clinical settings will continue to increase.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Although this deep learning model promises great potential for digitized health data, putting the model into production and prospectively validating operational data is as crucial as model building and a critical step in assessing and ensuring its operational effectiveness. However, we expect the model’s performance to deteriorate as it goes into production, potentially impacting data quality. Moving forward, we plan to run a silent-period production validation to further prospectively explore the model’s performance. During the silent period, our model will be integrated into the data quality and management workflow for the laboratory data repository, and the outputs will be internally validated in a fashion that would avoid exposure to data users. We also plan to run rigorous evaluation and continuous refinement of the model in the silent period to assess its performance better before it enters production. Transformers heralded a new era in the NLP field and have shown to be very successful in many tasks. Our future direction includes improving the performance of our NLP pipeline by adding transformer models.</p>
        <p>Another significant limitation of this study is that the model was only trained on respiratory virus laboratory reports. Even within that collection, some categories were naturally underrepresented, which impacted the model's generalizability. Therefore, during the silent period, more records from a diverse set of laboratory reports from various categories will be annotated and made available to the model, and the model will be updated accordingly. Finally, this study lacks explainability, which could limit the adoption of our deep learning–based models in future applications. Therefore, we plan to develop parallel pipelines that help explain the representations of the laboratory reports and the classifier’s decision boundary. </p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>The health industry is rapidly becoming digitized, and information extraction is a promising method for researchers and clinicians seeking quick retrieval of information embedded in texts. This study described developing and validating a deep learning–based NLP approach to extract respiratory virus testing information from laboratory reports. We demonstrated that our system could classify and encode large volumes of text-based laboratory reports with high performance without any of the previous time-consuming handcrafted feature engineering approaches. Taken together, the findings of this study provide encouraging support that NLP-based information extraction could become an important component of laboratory information repositories to assist researchers, clinicians, and health care providers with their information and knowledge management tasks.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Details of hyperparameter tuning.</p>
        <media xlink:href="ai_v2i1e44835_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 51 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Fine-grained classification results (F1-scores from the best performing model).</p>
        <media xlink:href="ai_v2i1e44835_app2.pdf" xlink:title="PDF File  (Adobe PDF File), 83 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">Bi-LSTM</term>
          <def>
            <p>bidirectional long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">GloVe</term>
          <def>
            <p>global vectors</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">LOINC</term>
          <def>
            <p>Logical Observation Identifiers Names and Codes</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">OLIS</term>
          <def>
            <p>Ontario Laboratories Information System</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">PHIPA</term>
          <def>
            <p>Personal Health Information Protection Act</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">RNN</term>
          <def>
            <p>recurrent neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">RSV</term>
          <def>
            <p>respiratory syncytial virus</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">SME</term>
          <def>
            <p>subject matter expert</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This study was a collaborative effort supported by the Vector Institute, an independent, not-for-profit corporation dedicated to research in the field of artificial intelligence, and ICES, an independent, nonprofit research organization that uses population-based health and social data to produce knowledge on a broad range of health care issues. Resources used in preparing this work were funded by an annual grant from the Ontario Ministry of Health (MOH) and the Ministry of Long-Term Care (MLTC). Parts of this material are based on data and information compiled and provided by the Ontario Ministry of Health. This work was also supported by a SickKids-Canadian Institutes of Health Research New Investigator Grant in Child and Youth Health (NI19-1065). The analyses, conclusions, opinions, and statements expressed herein are solely those of the authors and do not reflect those of the funding or data sources; no endorsement is intended or should be inferred.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The data underlying this work are held securely in coded form at ICES and therefore cannot be shared publicly due to data privacy concerns and legal data sharing agreements between ICES and data providers (eg, health care organizations and the government). However, data access might be granted to those who meet prespecified criteria for confidential access (email: das@ices.on.ca).</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abhyankar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Demner-Fushman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>McDonald</surname>
              <given-names>CJ</given-names>
            </name>
          </person-group>
          <article-title>Standardizing clinical laboratory data for secondary use</article-title>
          <source>J Biomed Inform</source>
          <year>2012</year>
          <month>08</month>
          <volume>45</volume>
          <issue>4</issue>
          <fpage>642</fpage>
          <lpage>50</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(12)00065-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2012.04.012</pub-id>
          <pub-id pub-id-type="medline">22561944</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(12)00065-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC3419308</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kudler</surname>
              <given-names>NR</given-names>
            </name>
            <name name-style="western">
              <surname>Pantanowitz</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Overview of laboratory data tools available in a single electronic medical record</article-title>
          <source>J Pathol Inform</source>
          <year>2010</year>
          <month>05</month>
          <day>26</day>
          <volume>1</volume>
          <issue>1</issue>
          <fpage>3</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2153-3539(22)00095-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.4103/2153-3539.63824</pub-id>
          <pub-id pub-id-type="medline">20805960</pub-id>
          <pub-id pub-id-type="pii">S2153-3539(22)00095-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC2929542</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Clarke</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Marzinke</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <source>Contemporary Practice in Clinical Chemistry</source>
          <year>2020</year>
          <month>06</month>
          <day>08</day>
          <publisher-loc>Cambridge, MA</publisher-loc>
          <publisher-name>Academic Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ross</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ohno-Machado</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>“Big Data” and the electronic health record</article-title>
          <source>Yearb Med Inform</source>
          <year>2018</year>
          <month>03</month>
          <day>05</day>
          <volume>23</volume>
          <issue>01</issue>
          <fpage>97</fpage>
          <lpage>104</lpage>
          <pub-id pub-id-type="doi">10.15265/iy-2014-0003</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>PB</given-names>
            </name>
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Brunak</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Mining electronic health records: towards better research applications and clinical care</article-title>
          <source>Nat Rev Genet</source>
          <year>2012</year>
          <month>05</month>
          <day>02</day>
          <volume>13</volume>
          <issue>6</issue>
          <fpage>395</fpage>
          <lpage>405</lpage>
          <pub-id pub-id-type="doi">10.1038/nrg3208</pub-id>
          <pub-id pub-id-type="medline">22549152</pub-id>
          <pub-id pub-id-type="pii">nrg3208</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kuo</surname>
              <given-names>TT</given-names>
            </name>
            <name name-style="western">
              <surname>Rao</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Maehara</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Doan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chaparro</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Day</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Farcas</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ohno-Machado</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>CN</given-names>
            </name>
          </person-group>
          <article-title>Ensembles of NLP tools for data element extraction from clinical notes</article-title>
          <year>2017</year>
          <conf-name>AMIA Annual Symposium</conf-name>
          <conf-date>November 16</conf-date>
          <conf-loc>Chicago, IL</conf-loc>
          <fpage>1880</fpage>
          <lpage>1889</lpage>
          <pub-id pub-id-type="doi">10.5281/zenodo.1491953</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Masanz</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ogren</surname>
              <given-names>PV</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kipper-Schuler</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Chute</surname>
              <given-names>CG</given-names>
            </name>
          </person-group>
          <article-title>Mayo clinical Text Analysis and Knowledge Extraction System (cTAKES): architecture, component evaluation and applications</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <month>09</month>
          <day>01</day>
          <volume>17</volume>
          <issue>5</issue>
          <fpage>507</fpage>
          <lpage>13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/20819853"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.2009.001560</pub-id>
          <pub-id pub-id-type="medline">20819853</pub-id>
          <pub-id pub-id-type="pii">17/5/507</pub-id>
          <pub-id pub-id-type="pmcid">PMC2995668</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Shagina</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lussier</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hripcsak</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Automated encoding of clinical documents based on natural language processing</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2004</year>
          <month>09</month>
          <day>01</day>
          <volume>11</volume>
          <issue>5</issue>
          <fpage>392</fpage>
          <lpage>402</lpage>
          <pub-id pub-id-type="doi">10.1197/jamia.m1552</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Altosaar</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ranganath</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>ClinicalBERT: modeling clinical notes and predicting hospital readmission</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online April 10, 2019.
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1904.05342"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1904.05342</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kormilitzin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Vaci</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Nevado-Holgado</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Med7: A transferable clinical natural language processing model for electronic health records</article-title>
          <source>Artif Intell Med</source>
          <year>2021</year>
          <month>08</month>
          <volume>118</volume>
          <fpage>102086</fpage>
          <pub-id pub-id-type="doi">10.1016/j.artmed.2021.102086</pub-id>
          <pub-id pub-id-type="medline">34412834</pub-id>
          <pub-id pub-id-type="pii">S0933-3657(21)00079-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Clinical named entity recognition using deep learning models</article-title>
          <year>2017</year>
          <conf-name>AMIA Annual Symposium</conf-name>
          <conf-date>November 16</conf-date>
          <conf-loc>Washington, DC</conf-loc>
          <fpage>1812</fpage>
          <lpage>1819</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Si</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Enhancing clinical concept extraction with contextual embeddings</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2019</year>
          <month>11</month>
          <day>01</day>
          <volume>26</volume>
          <issue>11</issue>
          <fpage>1297</fpage>
          <lpage>1304</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31265066"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocz096</pub-id>
          <pub-id pub-id-type="medline">31265066</pub-id>
          <pub-id pub-id-type="pii">5527248</pub-id>
          <pub-id pub-id-type="pmcid">PMC6798561</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Esteva</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Robicquet</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ramsundar</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Kuleshov</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>DePristo</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chou</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Thrun</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A guide to deep learning in healthcare</article-title>
          <source>Nat Med</source>
          <year>2019</year>
          <month>01</month>
          <day>7</day>
          <volume>25</volume>
          <issue>1</issue>
          <fpage>24</fpage>
          <lpage>29</lpage>
          <pub-id pub-id-type="doi">10.1038/s41591-018-0316-z</pub-id>
          <pub-id pub-id-type="medline">30617335</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-018-0316-z</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Datta</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Si</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Soni</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Xiang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Deep learning in clinical natural language processing: a methodical review</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2020</year>
          <month>03</month>
          <day>01</day>
          <volume>27</volume>
          <issue>3</issue>
          <fpage>457</fpage>
          <lpage>470</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31794016"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocz200</pub-id>
          <pub-id pub-id-type="medline">31794016</pub-id>
          <pub-id pub-id-type="pii">5651084</pub-id>
          <pub-id pub-id-type="pmcid">PMC7025365</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Boag</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Wacome</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Naumann</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rumshisky</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>CliNER: A lightweight tool for clinical named entity recognition</article-title>
          <year>2015</year>
          <conf-name>AMIA Joint Summits on Clinical Research Informatics</conf-name>
          <conf-date>March 23-27</conf-date>
          <conf-loc>San Francisco, CA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sugimoto</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Takeda</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Oh</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wada</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Konishi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yamahata</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Manabe</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tomiyama</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Matsunaga</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Nakanishi</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Matsumura</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Extracting clinical terms from radiology reports with deep learning</article-title>
          <source>J Biomed Inform</source>
          <year>2021</year>
          <month>04</month>
          <volume>116</volume>
          <fpage>103729</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(21)00058-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2021.103729</pub-id>
          <pub-id pub-id-type="medline">33711545</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(21)00058-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Si</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>A frame-based NLP system for cancer-related information extraction</article-title>
          <year>2018</year>
          <conf-name>AMIA Annual Symposium</conf-name>
          <conf-date>November 3-7</conf-date>
          <conf-loc>San Francisco, CA</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://pubmed.ncbi.nlm.nih.gov/30815198/"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Banerjee</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Ling</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Hasan</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Langlotz</surname>
              <given-names>CP</given-names>
            </name>
            <name name-style="western">
              <surname>Moradzadeh</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Amrhein</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mong</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rubin</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Farri</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Lungren</surname>
              <given-names>MP</given-names>
            </name>
          </person-group>
          <article-title>Comparative effectiveness of convolutional neural network (CNN) and recurrent neural network (RNN) architectures for radiology text report classification</article-title>
          <source>Artif Intell Med</source>
          <year>2019</year>
          <month>06</month>
          <volume>97</volume>
          <fpage>79</fpage>
          <lpage>88</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30477892"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.artmed.2018.11.004</pub-id>
          <pub-id pub-id-type="medline">30477892</pub-id>
          <pub-id pub-id-type="pii">S0933-3657(17)30625-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC6533167</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Ball</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Moradzadeh</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>BE</given-names>
            </name>
            <name name-style="western">
              <surname>Larson</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Langlotz</surname>
              <given-names>CP</given-names>
            </name>
            <name name-style="western">
              <surname>Amrhein</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lungren</surname>
              <given-names>MP</given-names>
            </name>
          </person-group>
          <article-title>Deep learning to classify radiology free-text reports</article-title>
          <source>Radiology</source>
          <year>2018</year>
          <month>03</month>
          <volume>286</volume>
          <issue>3</issue>
          <fpage>845</fpage>
          <lpage>852</lpage>
          <pub-id pub-id-type="doi">10.1148/radiol.2017171115</pub-id>
          <pub-id pub-id-type="medline">29135365</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Young</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Qiu</surname>
              <given-names>JX</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>H-J</given-names>
            </name>
            <name name-style="western">
              <surname>Christian</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Fearn</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Tourassi</surname>
              <given-names>GD</given-names>
            </name>
            <name name-style="western">
              <surname>Ramanthan</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Hierarchical attention networks for information extraction from cancer pathology reports</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2018</year>
          <month>03</month>
          <day>01</day>
          <volume>25</volume>
          <issue>3</issue>
          <fpage>321</fpage>
          <lpage>330</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29155996"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocx131</pub-id>
          <pub-id pub-id-type="medline">29155996</pub-id>
          <pub-id pub-id-type="pii">4636780</pub-id>
          <pub-id pub-id-type="pmcid">PMC7282502</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>GBD 2013 Mortality Causes of Death Collaborators</collab>
          </person-group>
          <article-title>Global, regional, and national age-sex specific all-cause and cause-specific mortality for 240 causes of death, 1990-2013: a systematic analysis for the Global Burden of Disease Study 2013</article-title>
          <source>Lancet</source>
          <year>2015</year>
          <month>01</month>
          <day>10</day>
          <volume>385</volume>
          <issue>9963</issue>
          <fpage>117</fpage>
          <lpage>71</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25530442"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(14)61682-2</pub-id>
          <pub-id pub-id-type="medline">25530442</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(14)61682-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC4340604</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>GBD 2016 Disease Injury Incidence Prevalence Collaborators</collab>
          </person-group>
          <article-title>Global, regional, and national incidence, prevalence, and years lived with disability for 328 diseases and injuries for 195 countries, 1990-2016: a systematic analysis for the Global Burden of Disease Study 2016</article-title>
          <source>Lancet</source>
          <year>2017</year>
          <month>09</month>
          <day>16</day>
          <volume>390</volume>
          <issue>10100</issue>
          <fpage>1211</fpage>
          <lpage>1259</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0140-6736(17)32154-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(17)32154-2</pub-id>
          <pub-id pub-id-type="medline">28919117</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(17)32154-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC5605509</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Macias</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>McElhaney</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Chaves</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Nealon</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Nunes</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Samson</surname>
              <given-names>SI</given-names>
            </name>
            <name name-style="western">
              <surname>Seet</surname>
              <given-names>BT</given-names>
            </name>
            <name name-style="western">
              <surname>Weinke</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>The disease burden of influenza beyond respiratory illness</article-title>
          <source>Vaccine</source>
          <year>2021</year>
          <month>03</month>
          <day>15</day>
          <volume>39 Suppl 1</volume>
          <fpage>A6</fpage>
          <lpage>A14</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0264-410X(20)31209-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.vaccine.2020.09.048</pub-id>
          <pub-id pub-id-type="medline">33041103</pub-id>
          <pub-id pub-id-type="pii">S0264-410X(20)31209-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC7545338</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lample</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ballesteros</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Subramanian</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kawakami</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Dyer</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Neural architectures for named entity recognition</article-title>
          <year>2016</year>
          <conf-name>Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</conf-name>
          <conf-date>June 12-17</conf-date>
          <conf-loc>San Diego, CA</conf-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/N16-1030"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/N16-1030</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Jernite</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sontag</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rush</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Character-aware neural language models</article-title>
          <source>Association for the Advancement of Artificial Intelligence</source>
          <year>2016</year>
          <month>03</month>
          <day>05</day>
          <volume>30</volume>
          <issue>1</issue>
          <pub-id pub-id-type="doi">10.1609/aaai.v30i1.10362</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pennington</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Socher</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>Ch</given-names>
            </name>
          </person-group>
          <article-title>Glove: Global vectors for word representation</article-title>
          <year>2014</year>
          <conf-name>Conference on Empirical Methods in Natural Language Processing (EMNLP)</conf-name>
          <conf-date>October 25–29</conf-date>
          <conf-loc>Doha, Qatar</conf-loc>
          <pub-id pub-id-type="doi">10.3115/v1/D14-1162</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Qiu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>Extracting comprehensive clinical information for breast cancer using deep learning methods</article-title>
          <source>Int J Med Inform</source>
          <year>2019</year>
          <month>12</month>
          <volume>132</volume>
          <fpage>103985</fpage>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2019.103985</pub-id>
          <pub-id pub-id-type="medline">31627032</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(19)31006-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Danciu</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Alamudun</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bitterman</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Tourassi</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Warner</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>Use of natural language processing to extract clinical cancer phenotypes from electronic medical records</article-title>
          <source>Cancer Res</source>
          <year>2019</year>
          <month>11</month>
          <day>01</day>
          <volume>79</volume>
          <issue>21</issue>
          <fpage>5463</fpage>
          <lpage>5470</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31395609"/>
          </comment>
          <pub-id pub-id-type="doi">10.1158/0008-5472.CAN-19-0579</pub-id>
          <pub-id pub-id-type="medline">31395609</pub-id>
          <pub-id pub-id-type="pii">0008-5472.CAN-19-0579</pub-id>
          <pub-id pub-id-type="pmcid">PMC7227798</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>López Pineda</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Visweswaran</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cooper</surname>
              <given-names>GF</given-names>
            </name>
            <name name-style="western">
              <surname>Wagner</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Tsui</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Comparison of machine learning classifiers for influenza detection from emergency department free-text reports</article-title>
          <source>J Biomed Inform</source>
          <year>2015</year>
          <month>12</month>
          <volume>58</volume>
          <fpage>60</fpage>
          <lpage>69</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00187-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2015.08.019</pub-id>
          <pub-id pub-id-type="medline">26385375</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(15)00187-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC4684714</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tsui</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Wagner</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Espino</surname>
              <given-names>JU</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>Influenza detection from emergency department reports using natural language processing and Bayesian network classifiers</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2014</year>
          <month>09</month>
          <day>01</day>
          <volume>21</volume>
          <issue>5</issue>
          <fpage>815</fpage>
          <lpage>23</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/24406261"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2013-001934</pub-id>
          <pub-id pub-id-type="medline">24406261</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2013-001934</pub-id>
          <pub-id pub-id-type="pmcid">PMC4147621</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pineda</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tsui</surname>
              <given-names>FC</given-names>
            </name>
            <name name-style="western">
              <surname>Visweswaran</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cooper</surname>
              <given-names>FG</given-names>
            </name>
          </person-group>
          <article-title>Detection of patients with influenza syndrome using machine-learning models learned from emergency department reports</article-title>
          <source>Online J Public Health Inform</source>
          <year>2013</year>
          <month>04</month>
          <day>4</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>41</fpage>
          <lpage>41</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3692886/"/>
          </comment>
          <pub-id pub-id-type="doi">10.5210/ojphi.v5i1.4446</pub-id>
          <pub-id pub-id-type="pmcid">PMC3692886</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
