<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR AI</journal-id>
      <journal-title>JMIR AI</journal-title>
      <issn pub-type="epub">2817-1705</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v3i1e56590</article-id>
      <article-id pub-id-type="pmid">39259582</article-id>
      <article-id pub-id-type="doi">10.2196/56590</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Development of Lung Cancer Risk Prediction Machine Learning Models for Equitable Learning Health System: Retrospective Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>El Emam</surname>
            <given-names>Khaled</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Malin</surname>
            <given-names>Bradley</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Jani</surname>
            <given-names>Mehul</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Green</surname>
            <given-names>Brian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Anjun</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4209-8301</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Wu</surname>
            <given-names>Erman</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2363-3738</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Huang</surname>
            <given-names>Ran</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6421-3361</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Shen</surname>
            <given-names>Bairong</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2899-1531</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Han</surname>
            <given-names>Ruobing</given-names>
          </name>
          <degrees>MA</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5932-9829</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Wen</surname>
            <given-names>Jian</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6370-3964</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Zhiyong</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3112-3807</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Qinghua</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <address>
            <institution>Department of Neurology</institution>
            <institution>Guilin Medical University Affiliated Hospital</institution>
            <addr-line>15 Lequn Road</addr-line>
            <addr-line>Guilin, Guangxi, 541000</addr-line>
            <country>China</country>
            <phone>86 15878361508</phone>
            <email>qhli1999@glmc.edu.cn</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4547-8513</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>School of Public Health</institution>
        <institution>Guilin Medical University</institution>
        <addr-line>Guilin</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>West China Hospital</institution>
        <addr-line>Chengdu</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Guilin Medical University</institution>
        <addr-line>Guilin</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Neurology</institution>
        <institution>Guilin Medical University Affiliated Hospital</institution>
        <addr-line>Guilin, Guangxi</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Qinghua Li <email>qhli1999@glmc.edu.cn</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>11</day>
        <month>9</month>
        <year>2024</year>
      </pub-date>
      <volume>3</volume>
      <elocation-id>e56590</elocation-id>
      <history>
        <date date-type="received">
          <day>21</day>
          <month>1</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>30</day>
          <month>3</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>2</day>
          <month>4</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>1</day>
          <month>5</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Anjun Chen, Erman Wu, Ran Huang, Bairong Shen, Ruobing Han, Jian Wen, Zhiyong Zhang, Qinghua Li. Originally published in JMIR AI (https://ai.jmir.org), 11.09.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR AI, is properly cited. The complete bibliographic information, a link to the original publication on https://www.ai.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://ai.jmir.org/2024/1/e56590" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>A significant proportion of young at-risk patients and nonsmokers are excluded by the current guidelines for lung cancer (LC) screening, resulting in low-screening adoption. The vision of the US National Academy of Medicine to transform health systems into learning health systems (LHS) holds promise for bringing necessary structural changes to health care, thereby addressing the exclusivity and adoption issues of LC screening.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to realize the LHS vision by designing an equitable, machine learning (ML)–enabled LHS unit for LC screening. It focuses on developing an inclusive and practical LC risk prediction model, suitable for initializing the ML-enabled LHS (ML-LHS) unit. This model aims to empower primary physicians in a clinical research network, linking central hospitals and rural clinics, to routinely deliver risk-based screening for enhancing LC early detection in broader populations.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We created a standardized data set of health factors from 1397 patients with LC and 1448 control patients, all aged 30 years and older, including both smokers and nonsmokers, from a hospital’s electronic medical record system. Initially, a data-centric ML approach was used to create inclusive ML models for risk prediction from all available health factors. Subsequently, a quantitative distribution of LC health factors was used in feature engineering to refine the models into a more practical model with fewer variables.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The initial inclusive 250-variable XGBoost model for LC risk prediction achieved performance metrics of 0.86 recall, 0.90 precision, and 0.89 accuracy. Post feature refinement, a practical 29-variable XGBoost model was developed, displaying performance metrics of 0.80 recall, 0.82 precision, and 0.82 accuracy. This model met the criteria for initializing the ML-LHS unit for risk-based, inclusive LC screening within clinical research networks.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This study designed an innovative ML-LHS unit for a clinical research network, aiming to sustainably provide inclusive LC screening to all at-risk populations. It developed an inclusive and practical XGBoost model from hospital electronic medical record data, capable of initializing such an ML-LHS unit for community and rural clinics. The anticipated deployment of this ML-LHS unit is expected to significantly improve LC-screening rates and early detection among broader populations, including those typically overlooked by existing screening guidelines.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>lung cancer</kwd>
        <kwd>risk prediction</kwd>
        <kwd>early detection</kwd>
        <kwd>learning health system</kwd>
        <kwd>LHS</kwd>
        <kwd>machine learning</kwd>
        <kwd>ML</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>AI</kwd>
        <kwd>predictive model</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Lung Cancer–Screening Challenges</title>
        <p>Lung cancer (LC) is the second most common cancer and the leading cause of cancer deaths worldwide [<xref ref-type="bibr" rid="ref1">1</xref>]. It accounted for an estimated 2.2 million new cases and 1.8 million deaths in 2020. Screening for early detection of LC is a crucial strategy to combat this deadly disease [<xref ref-type="bibr" rid="ref2">2</xref>]. LC-screening guidelines recommend that heavy smokers aged 50-80 years undergo LC screening [<xref ref-type="bibr" rid="ref3">3</xref>]. Clinical trials have shown about a 20% reduction in LC mortality due to screening with low-dose computed tomography [<xref ref-type="bibr" rid="ref4">4</xref>].</p>
        <p>However, nonsmoking adults and individuals younger than 50 years are often excluded from LC-screening guidelines, despite representing a significant percentage of patients with LC worldwide [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. Statistical risk prediction models, such as PLCOm2012, have been used to recommend LC screening for smokers [<xref ref-type="bibr" rid="ref7">7</xref>]. The subsequent PLCOall2014 model included nonsmokers in risk evaluation [<xref ref-type="bibr" rid="ref8">8</xref>], but its impact on screening uptake was unclear. In addition, the adoption of LC screening is low; for instance, only about 5% of the at-risk population in the United States has undergone LC screening [<xref ref-type="bibr" rid="ref9">9</xref>].</p>
        <p>There have been numerous research efforts to overcome these challenges, but their results were inconclusive and unsatisfactory [<xref ref-type="bibr" rid="ref10">10</xref>]. Researchers have proposed individualized risk-based screening approaches for both smokers and nonsmokers [<xref ref-type="bibr" rid="ref11">11</xref>]. In 2018, the PLCO model developer reviewed several traditional risk prediction models and suggested that the including biomarkers might help identify individuals who could benefit from LC screening [<xref ref-type="bibr" rid="ref12">12</xref>]. The PanCan study demonstrated that selecting participants for LC screening based on risk modeling could identify patients with early-stage LC [<xref ref-type="bibr" rid="ref13">13</xref>]. A recent systematic review concluded that further research is needed to optimize risk-based LC screening [<xref ref-type="bibr" rid="ref14">14</xref>]. Concurrently, an updated evidence report for the US Preventive Services Task Force indicated that screening high-risk individuals with low-dose computed tomography could reduce LC mortality but might also lead to false positives, resulting in unnecessary tests and invasive procedures [<xref ref-type="bibr" rid="ref15">15</xref>].</p>
        <p>As electronic medical records (EMRs) become prevalent in hospitals, several machine learning (ML) models have been developed using EMR data for LC risk prediction. Kaiser researchers used a small set of preselected variables to identify patients with early-stage LC from routine clinical and laboratory data [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. Stanford researchers developed an ML model to predict the 1-year risk of incident LC using more than 33,000 features from EMR data [<xref ref-type="bibr" rid="ref18">18</xref>]. Deep learning with convolutional neural networks applied to EMR data from 2 million patients produced a high-performance LC risk prediction model [<xref ref-type="bibr" rid="ref19">19</xref>]. However, the widespread deployment of these models for risk-based LC screening is yet to be determined.</p>
      </sec>
      <sec>
        <title>The Learning Health System Approach</title>
        <p>Over a decade ago, the US National Academy of Medicine (NAM) identified some major shortcomings in the current clinical evidence generation enterprise and proposed the vision of learning health systems (LHS) to address these issues [<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref22">22</xref>]. First, many guidelines are primarily based on clinical trials with narrow scopes, failing to fully represent real-world scenarios. For instance, the exclusion of nonsmokers and younger populations from the LC guidelines might be a result of these narrow scopes. Second, the slow dissemination of evidence from discovery to clinical practice contributes to the low adoption rate of LC screening. To address these significant challenges, NAM envisions transforming health systems into LHS to bring necessary structural changes to health care. One of the most significant system-level changes in LHS is that embedding clinical research becomes into routine clinical delivery, facilitating more efficient generation of real-world evidence from real-world data (RWD) of patients and faster dissemination of new evidence to practices. Efficient evidence generation also necessitates innovations in clinical trial methodologies, such as pragmatic clinical trials [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>].</p>
        <p>We believe that NAM’s LHS vision points in the right direction to address the exclusivity, bias, and adoption issues of LC screening. In pursuing sustainable, long-term solutions for inclusive screening and increased screening rates, we believe that system-level innovations are essential. We have focused on two interdependent considerations: (1) more inclusive intervention: exploring data-centric, risk-based LC-screening recommendations instead of blunt exclusions of certain demographic groups; and (2) broader access to the intervention: applying ML-based artificial intelligence (AI) to enable doctors in community and rural primary care to conduct routine risk-based LC screening. Our goal is to assess whether identifying at-risk individuals anywhere using the LHS approach can help close the gap in LC-screening disparities.</p>
        <p>These considerations necessitate at least two innovations: (1) a new ML-enabled LHS unit that can continuously improve ML models and thus enhance risk prediction services. Our first ML-enabled LHS (ML-LHS) simulation study using synthetic patient data demonstrated performance improvement of LC risk prediction ML models over time [<xref ref-type="bibr" rid="ref25">25</xref>]. (2) ML models that are inclusive in terms of patient populations and practical for use in low-resource clinics. Previously, by applying a data-centric EMR ML approach and feature engineering based on a quantitative distribution of health factors derived from EMR data [<xref ref-type="bibr" rid="ref26">26</xref>]. we successfully developed an inclusive and practical ML model for predicting the risk of nasopharyngeal cancer [<xref ref-type="bibr" rid="ref27">27</xref>].</p>
      </sec>
      <sec>
        <title>Aims</title>
        <p>This study aimed to design an equitable ML-LHS unit for LC screening and to develop an inclusive and practical LC risk prediction model suitable for initializing the LC-screening ML-LHS unit. The future deployment of this new LC ML-LHS unit will aid in implementing risk-based LC screening across populations broader than those currently covered by existing LC-screening guidelines, thereby improving both patient coverage and LC-screening rate.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Hybrid EMR ML Pipeline for Inclusive and Practical LC ML Model</title>
        <p>We designed a hybrid EMR ML pipeline to create an inclusive and practical ML model for LC risk prediction (see <xref rid="figure1" ref-type="fig">Figure 1</xref>). In step 1, data related to all health factors associated with LC are collected from the EMR. Common ML algorithms, such as XGBoost, are then used to train risk prediction models using these data. In step 2, a patient graph is constructed using all health factors in the EMR, which produces a quantitative LC health factor distribution. In step 3, feature engineering, based on the health factor distribution, refines the model into a more practical one with fewer variables. The recently published patient graph analysis method is used to generate this quantitative distribution of health factors from hospital EMR data [<xref ref-type="bibr" rid="ref26">26</xref>].</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Hybrid EMR ML pipeline for developing inclusive and practical machine learning models for lung cancer risk prediction. The inclusive ML model uses as many health factor variables from EMR as possible. In contrast, the practical ML model uses a small number of variables that are readily available in low-resource clinics. The quantitative distribution of health factor distribution, derived from real-world patient data, aids in refining the features of the inclusive model to formulate the practical model. EMR: electronic medical record; ML: machine learning.</p>
          </caption>
          <graphic xlink:href="ai_v3i1e56590_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Standardized Patient Data Collection</title>
        <p>Deidentified patient medical records were generated from the hospital’s EMR and relevant databases, covering the period from January 2018 to June 2021. These data sets were securely stored on a data server managed by the hospital’s informatics department. The data set encompassed about 1 million patients and 7 million outpatient and inpatient encounters. The records excluded all fields containing personal information, such as patient names, birth dates, personal IDs, contact details, and addresses. Original hospital identifiers for patients and encounters were replaced by random numbers, not linked to the patients.</p>
        <p>Due to the absence of applicable codes for diagnoses in the EMR, Chinese synonyms for LC were used to identify patients with LC. The targeted data set included 1397 patients with LC aged 30 years and older. In addition, 1448 patients aged 30 years and older with no LC were randomly selected to form the background or control data set. We maintained similar numbers of patients in the target and control groups to preserve class balance. However, data standardization, being time-consuming, limited the number of patients in the final structured data set. Based on our experience in building multiple models from EMR, the minimal number is approximately 1000 target patients and 1000 background patients.</p>
        <p>Deidentified records of outpatient and inpatient visits, diagnoses, laboratory tests, and procedures were imported into a custom data collection tool on the data server. This tool automatically extracted laboratory test data for storage in a MongoDB database, provided by MongoDB Inc. Our researchers manually curated data from patient record texts and entered them into the database. Data were categorized into 9 categories: disease and condition, symptom, medical history, observation, laboratory test, procedure, medication, treatment, and other risk factors. To overcome the lack of coding and standardization in the records, practical rules were established to ensure consistency in data collection. Synonyms were automatically converted to local “standard terms” with corresponding local codes, culminating in local “standard data.” For each patient with LC, only those data leading to the final diagnosis of LC were collected, forming a patient diagnosis journey (PDJ) object comprising 1 or multiple encounters. For each background patient, all encounters within the 3.5-years period were included. When exporting PDJ data to a comma-separated values file for analysis, only the most recent data for each health factor in the PDJ were selected.</p>
      </sec>
      <sec>
        <title>EMR ML for Inclusive LC Risk Prediction Models</title>
        <p>All continuous numeric data in the profiles were converted to categorical data. For example, age ranges were established as 30-50, 50-70, and more than 70 years; drinking levels were categorized as 0-2, and &gt;2 drinks per day; and smoking levels were divided into 0, 1-20, and &gt;20 cigarettes per day. Laboratory test results had predefined categorized such as normal or abnormal, true or false, positive or negative, and high, medium, or low. After this conversion, profiles of patient with LC encompassed more than 58,000 data items and 2066 codes, while background patient profiles comprised more than 46,000 data items and 1298 codes. Subsequently, the profile data were structured into a horizontal table for ML, labeling patients with LC as “1” and background patients as “0.”</p>
        <p>Codes were organized based on the number of associated patients with LC. Various sets of codes, exceeding a cutoff of 10 patients with LC, were selected by different criteria for ML. For the LC risk prediction study, all codes related to cancer diseases, procedures, medications, and treatments were omitted. In addition, diagnostic imaging procedures commonly used for patients with cancer but not for background patients were also excluded.</p>
        <p>In developing ML models, we used the XGBoost Python library [<xref ref-type="bibr" rid="ref28">28</xref>]. XGBoost is known for parallel tree boosting and its efficient management of missing data. The Python library scikit-learn from Scikit-learn.org was used for all other ML tasks [<xref ref-type="bibr" rid="ref29">29</xref>]. The free Jupyter Notebook tool was used to conduct ML experiments [<xref ref-type="bibr" rid="ref30">30</xref>]. The Pandas library was used for reading and writing comma-separated values files and manipulating data tables. The data set was divided into training (60%), tunning (20%), and validating (20%) subsets. Using the default hyperparameters, the XGBoost classifier was fitted with the training and tunning sets, and the resulting model was independently validated by the validation data set [<xref ref-type="bibr" rid="ref31">31</xref>]. The model’s effectiveness in risk prediction was evaluated using key metrics such as recall, precision, area under the receiver operating characteristic curve (AUROC), and accuracy. Receiver operating characteristic (ROC) curve and reliability (or calibration) curve were drawn by calling the corresponding Scikit-learn functions.</p>
        <p>By comparing the performances of models built from different variable sets, an inclusive variable set was established. Using this set, XGBoost was compared with 3 other commonly used algorithms: random forest (RF), support vector machines (SVM), and k-nearest neighbors (KNN). These algorithms were executed using Scikit-learn classifiers with default parameters. The main reason for evaluating only the common algorithms is because they are promising in delivering the initial acceptable performance required by our LHS design, and their deployment is easier and cost-efficient. Only if this test fails will we test more complex algorithms like neurol networks.</p>
      </sec>
      <sec>
        <title>Building Practical ML Prediction Models</title>
        <p>In the final refinement step of our hybrid ML pipeline, a quantitative distribution of LC health factors was generated directly from the same EMR data through patient graph analysis [<xref ref-type="bibr" rid="ref32">32</xref>]. In the patient graph, health factors are connected to patients with LC and background patients with no LC. The difference in the number of connections to patients with LC versus patients with no LC, called the “connection delta ratio” (CDR), was calculated for each health factor. Sorting the health factors by CDR in descending order provided a quantitative distribution of the health factors. Most of the top health factors with a CDR above a threshold were verified as LC risk factors or were correlated with LC in a literature review. This distribution laid the groundwork for grouping risk factors, selecting only 1 representative factor from each group for the ML model. For instance, pains at different body sites were combined into a single “pain” factor. Data for each variable group were also consolidated, considering the representative variable for the group as true if any of the variables in the group was true.</p>
        <p>The following criteria were applied to select a small number of variables for the practical variable set: (1) ensuring that the number of essential variables remained fewer than 30 while achieving key prediction performance metrics (recall, precision, and accuracy) above 80%; (2) using consolidated variables based on the risk factor distribution wherever feasible; (3) minimizing the number of required laboratory tests; and (4) using imaging observations obtainable through simple chest radiographs. The rationale for these empirical criteria is to make the deployment and adoption of the model more practical in low-resource clinical settings, where data for only a small number of variables may be available. However, the LHS starting model should strike a balance between a minimal number of variables and acceptable performance metrics. We tested and compared feature selections using XGBoost. After determining a practical set, we ran RF, SVM, and KNN algorithms for comparison. All models were trained and evaluated using the default parameters of the classifiers. The XGBoost base model used the following default hyperparameters: scale_pos_weight = 1, n_estimators = 500, max_depth = 6, eta = 0.3, gamma = 0, reg_lambda = 1.0, early_stopping_rounds = 5, and eval_metric = 'logloss'.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This retrospective study of EMR patient data received approval from the Institutional Review Board of Guilin Medical University Affiliated Hospital (number QTLL202139). Prior to data usage, our research team underwent training in patient data security and privacy policy of the hospital.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Design of ML-LHS Unit for LC Screening</title>
        <p>To improve patient inclusivity and adoption in LC screening, we designed a novel ML-enabled LHS unit for LC screening within a clinical research network (CRN). The CRN is led by a central hospital and participated by numerous clinics in surrounding communities and rural areas. The central hospital is tasked with developing an inclusive and practical LC risk prediction ML model to initialize the LHS unit and providing an AI tool online for clinic use. Primary physicians in these clinics are responsible for routinely using the AI tool to assess LC risk in all patient populations in the CRN. At-risk patients are recommended for LC screening. The hospital also continuously updates models with new patient data, validates models, and deploys improved models for predictive services.</p>
      </sec>
      <sec>
        <title>Inclusive LC Risk Prediction ML Models</title>
        <p>A total of 2845 patients, comprising 1397 patients with LC and 1448 patients with no LC, were selected from the EMR of a Chinese hospital. The cohort consisted of 60.8% (1731/2845) men and 39.2% (1114/2845) women. Agewise, 19.6% (557/2845) patients were between 30 and 50 years of age, 58.1% (1654/2845) were between 50 and 70 years of age, and 22.0% (625/2845) were older than 70 years. Within the patient group with LC, 19.8% (277/2845) had a history of smoking, while 80.2% (1120/2845) did not. Since the data set includes a significant number of patients outside the typical LC-screening guideline–recommended demographic, which usually targets heavy smokers aged 50-80 years, the resulting LC risk prediction models were more inclusive, encompassing a broader patient population aged 30 years and older, regardless of smoking status.</p>
        <p>To develop an LC risk prediction XGBoost model with default settings, we compared different sets of top-ranked health factors (including diseases, symptoms, medical histories, laboratory tests, observations, and other risk factors) from a list of more than 2000 factors, sorted by each factor’s prevalence in patients with LC. As the number of variables exceeded 200, key model performance metrics plateaued, reaching 0.85 for recall, 0.90 for precision, 0.88 for AUROC, and 0.88 for accuracy (<xref ref-type="table" rid="table1">Table 1</xref> and <xref rid="figure2" ref-type="fig">Figure 2</xref>). Consequently, a set of 250 variables was selected as the inclusive variable set (denoted as “iv250”).</p>
        <p>Using the iv250 set and default parameters, we compared XGBoost with other common algorithms such as RF, SVM, and KNN. <xref ref-type="table" rid="table2">Table 2</xref> demonstrates that XGBoost and SVM achieved similarly high performance levels, with 0.86 for recall, 0.90 for precision, 0.89 for AUROC, and 0.89 for accuracy. The ROC curve and the reliability curve of the iv250 XGBoost model are shown in <xref rid="figure3" ref-type="fig">Figure 3</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Performance metrics of the XGBoost lung cancer risk prediction models with different numbers of variables.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="150"/>
            <col width="80"/>
            <col width="90"/>
            <col width="80"/>
            <col width="90"/>
            <col width="80"/>
            <col width="90"/>
            <col width="80"/>
            <col width="90"/>
            <col width="80"/>
            <col width="90"/>
            <thead>
              <tr valign="top">
                <td>Metrics<sup>a</sup></td>
                <td colspan="10">Number of variables</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>10</td>
                <td>20</td>
                <td>30</td>
                <td>40</td>
                <td>50</td>
                <td>100</td>
                <td>150</td>
                <td>200</td>
                <td>250</td>
                <td>300</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Recall</td>
                <td>0.734</td>
                <td>0.755</td>
                <td>0.794</td>
                <td>0.794</td>
                <td>0.801</td>
                <td>0.816</td>
                <td>0.837</td>
                <td>0.858</td>
                <td>0.862</td>
                <td>0.887</td>
              </tr>
              <tr valign="top">
                <td>Precision</td>
                <td>0.802</td>
                <td>0.849</td>
                <td>0.830</td>
                <td>0.842</td>
                <td>0.856</td>
                <td>0.858</td>
                <td>0.904</td>
                <td>0.903</td>
                <td>0.914</td>
                <td>0.890</td>
              </tr>
              <tr valign="top">
                <td>AUROC<sup>b</sup></td>
                <td>0.778</td>
                <td>0.811</td>
                <td>0.817</td>
                <td>0.824</td>
                <td>0.835</td>
                <td>0.842</td>
                <td>0.875</td>
                <td>0.884</td>
                <td>0.891</td>
                <td>0.889</td>
              </tr>
              <tr valign="top">
                <td>Accuracy</td>
                <td>0.779</td>
                <td>0.812</td>
                <td>0.817</td>
                <td>0.824</td>
                <td>0.835</td>
                <td>0.842</td>
                <td>0.875</td>
                <td>0.884</td>
                <td>0.891</td>
                <td>0.889</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>The XGBoost machine learning base models were configured with default settings.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>AUROC: area under the receiver operating characteristic curve.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Trends in performance metrics of XGBoost lung cancer risk prediction models with varying numbers of variables. Base models were trained using default settings. ROC-AUC: area under the receiver operating characteristic curve.</p>
          </caption>
          <graphic xlink:href="ai_v3i1e56590_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Comparison of machine learning model performance using different algorithms for lung cancer risk prediction with default parameters<sup>a</sup>.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="160"/>
            <col width="170"/>
            <col width="180"/>
            <col width="250"/>
            <col width="210"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Algorithm</td>
                <td>XGBoost</td>
                <td>Random forest</td>
                <td>Support vector machines</td>
                <td>K-nearest neighbors</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="6">
                  <bold>The inclusive 250-variable set (iv250)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Recall</td>
                <td>0.862</td>
                <td>0.872</td>
                <td>0.887</td>
                <td>0.667</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Precision</td>
                <td>0.914</td>
                <td>0.875</td>
                <td>0.909</td>
                <td>0.715</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AUROC<sup>b</sup></td>
                <td>0.891</td>
                <td>0.875</td>
                <td>0.900</td>
                <td>0.703</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Accuracy</td>
                <td>0.891</td>
                <td>0.875</td>
                <td>0.900</td>
                <td>0.703</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>The inclusive and practical 29-variable set (pv29)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Recall</td>
                <td>0.805</td>
                <td>0.816</td>
                <td>0.748</td>
                <td>0.649</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Precision</td>
                <td>0.825</td>
                <td>0.830</td>
                <td>0.858</td>
                <td>0.832</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>AUROC</td>
                <td>0.819</td>
                <td>0.826</td>
                <td>0.813</td>
                <td>0.760</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Accuracy</td>
                <td>0.819</td>
                <td>0.826</td>
                <td>0.814</td>
                <td>0.761</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>All machine learning base models used default settings.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>AUROC: area under the receiver operating characteristic curve.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>ROC and reliability curves of XGBoost models for lung cancer risk prediction. Models were trained with the default settings. (A) ROC curve for the inclusive model using 250 variables (iv250). (B) Reliability curve for iv250. (C) ROC curve for the practical model using 29 variables (pv29). (D) Reliability curve for pv29. ROC: receiver operating characteristic.</p>
          </caption>
          <graphic xlink:href="ai_v3i1e56590_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Practical LC Risk Prediction ML Models</title>
        <p>For practical application in clinics, the models underwent further refinement through feature engineering based on the quantitative distribution of LC health factors. This refinement led to the development of a concise and practical set of 29 variables, termed “pv29.” <xref ref-type="table" rid="table3">Table 3</xref> presents the details of the pv29 variables.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>List of the 29 variables used in the inclusive and practical machine learning models for lung cancer risk prediction.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="410"/>
            <col width="340"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td>Category</td>
                <td>Local code</td>
                <td>Health factor term</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Disease</td>
                <td>C-572430</td>
                <td>Emphysema</td>
              </tr>
              <tr valign="top">
                <td>Disease</td>
                <td>C-654730</td>
                <td>Lung inflammation</td>
              </tr>
              <tr valign="top">
                <td>Disease</td>
                <td>C-897420</td>
                <td>Bronchitis</td>
              </tr>
              <tr valign="top">
                <td>History</td>
                <td>C-902187</td>
                <td>Smoking history</td>
              </tr>
              <tr valign="top">
                <td>Laboratory test</td>
                <td>C-602395</td>
                <td>Albumin/globulin ratio</td>
              </tr>
              <tr valign="top">
                <td>Laboratory test</td>
                <td>C-320164</td>
                <td>Hematocrit</td>
              </tr>
              <tr valign="top">
                <td>Laboratory test</td>
                <td>C-952408</td>
                <td>Non–small cell lung cancer–associated antigen</td>
              </tr>
              <tr valign="top">
                <td>Laboratory test</td>
                <td>C-023789</td>
                <td>Carcinoembryonic antigen</td>
              </tr>
              <tr valign="top">
                <td>Laboratory test</td>
                <td>C-945807</td>
                <td>Fibrinogen</td>
              </tr>
              <tr valign="top">
                <td>Laboratory test</td>
                <td>C-609483</td>
                <td>Lymphocyte ratio</td>
              </tr>
              <tr valign="top">
                <td>Laboratory test</td>
                <td>C-346250</td>
                <td>Platelet distribution width</td>
              </tr>
              <tr valign="top">
                <td>Laboratory test</td>
                <td>C-965710</td>
                <td>Hemoglobin concentration</td>
              </tr>
              <tr valign="top">
                <td>Laboratory test</td>
                <td>C-546207</td>
                <td>Globulin</td>
              </tr>
              <tr valign="top">
                <td>Laboratory test</td>
                <td>C-015328</td>
                <td>Alkaline phosphatase</td>
              </tr>
              <tr valign="top">
                <td>Laboratory test</td>
                <td>C-963520</td>
                <td>High-sensitivity C-reactive protein</td>
              </tr>
              <tr valign="top">
                <td>Laboratory test</td>
                <td>C-573086</td>
                <td>Neuron-specific enolase</td>
              </tr>
              <tr valign="top">
                <td>Laboratory test</td>
                <td>C-284309</td>
                <td>Carbohydrate antigen 153</td>
              </tr>
              <tr valign="top">
                <td>Laboratory test</td>
                <td>C-507246</td>
                <td>Urine protein</td>
              </tr>
              <tr valign="top">
                <td>Observation</td>
                <td>C-598214</td>
                <td>Lung nodules</td>
              </tr>
              <tr valign="top">
                <td>Observation</td>
                <td>C-825049</td>
                <td>Pleural effusion</td>
              </tr>
              <tr valign="top">
                <td>Observation</td>
                <td>C-567942</td>
                <td>Atelectasis</td>
              </tr>
              <tr valign="top">
                <td>Risk factor</td>
                <td>C-504168</td>
                <td>Gender</td>
              </tr>
              <tr valign="top">
                <td>Risk factor</td>
                <td>C-928456</td>
                <td>Age</td>
              </tr>
              <tr valign="top">
                <td>Symptom</td>
                <td>C-546879</td>
                <td>Cough</td>
              </tr>
              <tr valign="top">
                <td>Symptom</td>
                <td>C-984012</td>
                <td>Chest pain</td>
              </tr>
              <tr valign="top">
                <td>Symptom</td>
                <td>C-943817</td>
                <td>Shortness of breath</td>
              </tr>
              <tr valign="top">
                <td>Symptom</td>
                <td>C-152064</td>
                <td>Coughing up blood</td>
              </tr>
              <tr valign="top">
                <td>Symptom</td>
                <td>C-275809</td>
                <td>Chest tightness</td>
              </tr>
              <tr valign="top">
                <td>Symptom</td>
                <td>C-549780</td>
                <td> Pain</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p><xref ref-type="table" rid="table2">Table 2</xref> compares the key performance metrics of the base models (XGBoost, RF, SVM, and KNN) using the pv29 variable set with default settings. The pv29 XGBoost and RF models demonstrated comparable performance, achieving 0.80 recall, 0.82 precision, 0.82 AUROC, and 0.82 accuracy. <xref rid="figure3" ref-type="fig">Figure 3</xref> illustrates the ROC and reliability curves of the pv29 XGBoost model. Considering other requirements, including dealing with sparse data in EMRs and compute time, the pv29 XGBoost model was selected as the initial model for the LC risk prediction in initialization of the ML-LHS unit, aimed at the future implementation of risk-based LC-screening recommendations in broader populations.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>This study introduces a novel ML-LHS unit approach, aiming to offer sustainable and inclusive LC-screening solutions for all at-risk populations in both urban and rural areas within a CRN. To initiate this LC ML-LHS unit, we developed an inclusive and practical XGBoost model for LC risk prediction using hospital EMR data. This enables risk-based LC screening in broader patient populations aged 30 years and older, regardless of smoking status. Using 29 variables, accessible even in low-resource clinics, the ML model achieved LC risk prediction with performance metrics of 0.80 recall, 0.82 precision, 0.82 AUROC, and 0.82 accuracy. Because most of the 29 variables were verified as risk factors or correlated factors for LC in literature, these model outputs are highly plausible. If an end user provides values for the 29 variables to the XGBoost model, the model will return a probability (0%-100%) of LC risk. More than 50% indicates a high risk of having LC, while below 50% indicates a low risk.</p>
      </sec>
      <sec>
        <title>Future Direction: Implementing LC ML-LHS CRN</title>
        <p>Considering the challenges in LC screening, such as low-screening adoption and inadequate coverage for nonsmokers and younger patients, exploring risk-based screening strategies is vital [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref35">35</xref>]. Following the present study, a future direction involves externally validating the LC risk prediction model. If validated, we plan to deploy the LC ML-LHS unit across a CRN, which will continuously monitor, rebuild the model, validate the new model, and deploy the improved model in so-called “LHS learning cycles.” Once operational, this innovative LHS unit could improve LC-screening rates and early detection in hospitals, community clinics, and rural areas.</p>
        <p>Moreover, the ML-LHS CRN is well suited to screen for rare genetic mutations associated with LC, such as the ROS-1 mutation. If certain mutations are identified, personalized and precision medicine may be recommended by a doctor to the patient. Since the pv29 LC model does not contain the genetic mutations as variables, the LHS would need to integrate a large language model (LLM) into the prediction module for treatment prediction task. The top general-purpose LLMs, such as OpenAI’s ChatGPT 4 and Google Gemini 1.5, have shown high accuracy in making medical predictions in our and many other studies without requiring structured data input [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>]. Enhancing AI applicability through cooperation of structured data ML model and natural language LLMs presents an exciting future research direction.</p>
        <p>Furthermore, screening is just the beginning of a patient’s diagnostic journey in an equitable LHS. Future research should also investigate on how AI, particularly generative AI, and LHS can effectively follow up with high-risk patients, educate patients for shared decision-making, and remind patients to underdo diagnostic tests in time for early detection of LC. Simultaneously, LHS will coordinate primary care physicians and specialists to provide the appropriate diagnostics tests, such as image tests (computed tomography, positron emission tomography–computed tomography, and magnetic resonance imaging), pathology tests, and biopsies for final diagnosis. Future studies should also determine when to recommend molecular and genetic testing for achieving personalized and precision treatment.</p>
      </sec>
      <sec>
        <title>Future Direction: Applying the ML-LHS Approach to Other Diseases</title>
        <p>The vision of NAM’s LHS emphasizes using RWD to generate real-world evidence. As EMRs are a primary source of RWD, they can be used to develop inclusive and practical ML models for risk predictions of various diseases. Another promising future research direction is applying the ML-LHS unit approach proposed in this study to other preventable diseases and building LHS units in routine health care delivery, aimed at delivering more inclusive predictive screening in underserved populations.</p>
        <p>We identify the biggest challenge of applying ML or AI in disease screening for all populations as the difficulty of deployment. ML models requiring a large number of variables may be deployed in hospitals, but they may not be usable in small clinics because the required data cannot be collected there. This study proposes a promising solution to this deployment problem: design a novel ML-enabled LHS unit and strike a balance of minimal variables and acceptable performance for the starting ML model of the LHS. Reducing the number of variables in a practical model usually reduces mode performance compared with the inclusive mode. Setting 80% recall, precision, and accuracy as the acceptance bar, this study of the LC model and previous study of the nasopharyngeal cancer model demonstrated that it is possible to reduce the number of variables to below 30 [<xref ref-type="bibr" rid="ref27">27</xref>].</p>
        <p>For feature engineering, a common method is to use the feature importance list from the ML model. To meet the requirements of reducing variables to a minimal while keeping performance metrics above an acceptable level in starting up an ML-LHS unit, we have proposed an alternative approach that uses a quantitative distribution of health factors generated directly from EMR data by the patient graph CDR method in previous studies [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. This study demonstrated again the effectiveness of the new feature selection approach of using health factor distribution from the CDR method in developing inclusive and practical ML models.</p>
      </sec>
      <sec>
        <title>Limitations and Responsible AI</title>
        <p>This study, however, has limitations. The EMR data presented issues with bias and missing data [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>], which could potentially lead to biased models. For instance, smoking status and family history of LC were underreported in our data set. Significant efforts were made to understand and address these data biases, excluding variables where potential bias was identified. Despite these efforts, some biases may remain undetected and unmitigated. We also used algorithms such as XGBoost, known for effectively handling missing data. The lack of standardized structured data in EMRs made data collection labor-intensive. Reducing variables for practicality might risk overfitting in a small data set, though this issue should diminish as the ML-LHS unit continuously accumulates more data through its prediction service [<xref ref-type="bibr" rid="ref40">40</xref>].</p>
        <p>To further address these data bias issues as well as ML or AI application inequities, ML-LHS CRN will emphasize responsible AI development in future research [<xref ref-type="bibr" rid="ref41">41</xref>]. First, CRN will strive to include more clinics from communities and rural areas surrounding the lead hospital, providing access to a broader population for AI-based LC screening. Second, the ML model will be frequently updated with new data from all patients, particularly including underserved populations, to continuously make the ML data set more representative and less biased. Third, a governance committee should be established to review the development and use of the ML models to ensure high ethical standards, including protection of data safety and patient privacy, minimizing potential bias in data and algorithmic decision-making. Fourth, because mistakes or errors in AI prediction may cause harm or even deadly consequences, AI will be used only as a new information source for medical professionals or patients to make health care decisions.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study devised an innovative ML-LHS unit for a CRN to sustainably offer inclusive LC screening to all at-risk populations. For initializing such an ML-LHS unit serving community and rural clinics, we developed an inclusive and practical XGBoost model from hospital EMR data. Future deployment of the LC ML-LHS unit is expected to significantly improve LC-screening rates and early detection in broader populations, including those typically overlooked by existing LC-screening guidelines, such as nonsmokers and younger patients.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AUROC</term>
          <def>
            <p>area under the receiver operating characteristic curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CDR</term>
          <def>
            <p>connection delta ratio</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CRN</term>
          <def>
            <p>clinical research network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">EMR</term>
          <def>
            <p>electronic medical record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">KNN</term>
          <def>
            <p>K-nearest neighbors</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">LC</term>
          <def>
            <p>lung cancer</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">LHS</term>
          <def>
            <p>learning health system</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">ML</term>
          <def>
            <p>machine learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">ML-LHS</term>
          <def>
            <p>ML-enabled LHS</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">NAM</term>
          <def>
            <p>US National Academy of Medicine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">PDJ</term>
          <def>
            <p>patient diagnosis journey</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">RF</term>
          <def>
            <p>random forest</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">ROC</term>
          <def>
            <p>Receiver operating characteristic curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">RWD</term>
          <def>
            <p>real-world data</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb17">SVM</term>
          <def>
            <p>support vector machines</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors would like to thank Mr Xiaowang Chen from the Department of Medical Information at Guilin Medical University Affiliated Hospital for his support with the EMR data server and privacy training. This work was supported by funding from the Guilin Municipal Science and Technology Bureau, China (grant 20190219-2), and the Sichuan Science and Technology Support Program, China (grant 2020YFQ0019).</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The patient data sets used in the study are not available due to patient data privacy protection. Other data without privacy concern are available from the corresponding authors upon reasonable request.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sung</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ferlay</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Siegel</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Laversanne</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Soerjomataram</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Jemal</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bray</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Global Cancer Statistics 2020: GLOBOCAN estimates of incidence and mortality worldwide for 36 cancers in 185 countries</article-title>
          <source>CA Cancer J Clin</source>
          <year>2021</year>
          <month>05</month>
          <volume>71</volume>
          <issue>3</issue>
          <fpage>209</fpage>
          <lpage>249</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://onlinelibrary.wiley.com/doi/10.3322/caac.21660"/>
          </comment>
          <pub-id pub-id-type="doi">10.3322/caac.21660</pub-id>
          <pub-id pub-id-type="medline">33538338</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pinsky</surname>
              <given-names>PF</given-names>
            </name>
          </person-group>
          <article-title>Lung cancer screening with low-dose CT: a world-wide view</article-title>
          <source>Transl Lung Cancer Res</source>
          <year>2018</year>
          <month>06</month>
          <volume>7</volume>
          <issue>3</issue>
          <fpage>234</fpage>
          <lpage>242</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30050762"/>
          </comment>
          <pub-id pub-id-type="doi">10.21037/tlcr.2018.05.12</pub-id>
          <pub-id pub-id-type="medline">30050762</pub-id>
          <pub-id pub-id-type="pii">tlcr-07-03-234</pub-id>
          <pub-id pub-id-type="pmcid">PMC6037972</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>US Preventive Services Task Force</collab>
            <name name-style="western">
              <surname>Krist</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Davidson</surname>
              <given-names>KW</given-names>
            </name>
            <name name-style="western">
              <surname>Mangione</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Barry</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Cabana</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Caughey</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Donahue</surname>
              <given-names>KE</given-names>
            </name>
            <name name-style="western">
              <surname>Doubeni</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Kubik</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Landefeld</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ogedegbe</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Owens</surname>
              <given-names>DK</given-names>
            </name>
            <name name-style="western">
              <surname>Pbert</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Silverstein</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Stevermer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tseng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>JB</given-names>
            </name>
          </person-group>
          <article-title>Screening for lung cancer: US Preventive Services Task Force Recommendation Statement</article-title>
          <source>JAMA</source>
          <year>2021</year>
          <month>03</month>
          <day>09</day>
          <volume>325</volume>
          <issue>10</issue>
          <fpage>962</fpage>
          <lpage>970</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2021.1117</pub-id>
          <pub-id pub-id-type="medline">33687470</pub-id>
          <pub-id pub-id-type="pii">2777244</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>National Lung Screening Trial Research Team</collab>
            <name name-style="western">
              <surname>Aberle</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Adams</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Berg</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Black</surname>
              <given-names>WC</given-names>
            </name>
            <name name-style="western">
              <surname>Clapp</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Fagerstrom</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Gareen</surname>
              <given-names>IF</given-names>
            </name>
            <name name-style="western">
              <surname>Gatsonis</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Marcus</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Sicks</surname>
              <given-names>JD</given-names>
            </name>
          </person-group>
          <article-title>Reduced lung-cancer mortality with low-dose computed tomographic screening</article-title>
          <source>N Engl J Med</source>
          <year>2011</year>
          <month>08</month>
          <day>04</day>
          <volume>365</volume>
          <issue>5</issue>
          <fpage>395</fpage>
          <lpage>409</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/21714641"/>
          </comment>
          <pub-id pub-id-type="doi">10.1056/NEJMoa1102873</pub-id>
          <pub-id pub-id-type="medline">21714641</pub-id>
          <pub-id pub-id-type="pmcid">PMC4356534</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dubin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Griffin</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Lung cancer in non-smokers</article-title>
          <source>Mo Med</source>
          <year>2020</year>
          <volume>117</volume>
          <issue>4</issue>
          <fpage>375</fpage>
          <lpage>379</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32848276"/>
          </comment>
          <pub-id pub-id-type="medline">32848276</pub-id>
          <pub-id pub-id-type="pii">ms117_p0375</pub-id>
          <pub-id pub-id-type="pmcid">PMC7431055</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Jakopovic</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Giaccone</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Trends and characteristics of young non-small cell lung cancer patients in the United States</article-title>
          <source>Front Oncol</source>
          <year>2015</year>
          <volume>5</volume>
          <fpage>113</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26075181"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fonc.2015.00113</pub-id>
          <pub-id pub-id-type="medline">26075181</pub-id>
          <pub-id pub-id-type="pmcid">PMC4443720</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tammemägi</surname>
              <given-names>Martin C</given-names>
            </name>
            <name name-style="western">
              <surname>Katki</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Hocking</surname>
              <given-names>WG</given-names>
            </name>
            <name name-style="western">
              <surname>Church</surname>
              <given-names>TR</given-names>
            </name>
            <name name-style="western">
              <surname>Caporaso</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kvale</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Chaturvedi</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Silvestri</surname>
              <given-names>GA</given-names>
            </name>
            <name name-style="western">
              <surname>Riley</surname>
              <given-names>TL</given-names>
            </name>
            <name name-style="western">
              <surname>Commins</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Berg</surname>
              <given-names>CD</given-names>
            </name>
          </person-group>
          <article-title>Selection criteria for lung-cancer screening</article-title>
          <source>N Engl J Med</source>
          <year>2013</year>
          <month>02</month>
          <day>21</day>
          <volume>368</volume>
          <issue>8</issue>
          <fpage>728</fpage>
          <lpage>36</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/23425165"/>
          </comment>
          <pub-id pub-id-type="doi">10.1056/NEJMoa1211776</pub-id>
          <pub-id pub-id-type="medline">23425165</pub-id>
          <pub-id pub-id-type="pmcid">PMC3929969</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tammemägi</surname>
              <given-names>Martin C</given-names>
            </name>
            <name name-style="western">
              <surname>Church</surname>
              <given-names>TR</given-names>
            </name>
            <name name-style="western">
              <surname>Hocking</surname>
              <given-names>WG</given-names>
            </name>
            <name name-style="western">
              <surname>Silvestri</surname>
              <given-names>GA</given-names>
            </name>
            <name name-style="western">
              <surname>Kvale</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Riley</surname>
              <given-names>TL</given-names>
            </name>
            <name name-style="western">
              <surname>Commins</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Berg</surname>
              <given-names>CD</given-names>
            </name>
          </person-group>
          <article-title>Evaluation of the lung cancer risks at which to screen ever- and never-smokers: screening rules applied to the PLCO and NLST cohorts</article-title>
          <source>PLoS Med</source>
          <year>2014</year>
          <month>12</month>
          <volume>11</volume>
          <issue>12</issue>
          <fpage>e1001764</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pmed.1001764"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pmed.1001764</pub-id>
          <pub-id pub-id-type="medline">25460915</pub-id>
          <pub-id pub-id-type="pii">PMEDICINE-D-14-01117</pub-id>
          <pub-id pub-id-type="pmcid">PMC4251899</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yong</surname>
              <given-names>PC</given-names>
            </name>
            <name name-style="western">
              <surname>Sigel</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Rehmani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wisnivesky</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kale</surname>
              <given-names>MS</given-names>
            </name>
          </person-group>
          <article-title>Lung cancer screening uptake in the United States</article-title>
          <source>Chest</source>
          <year>2020</year>
          <month>01</month>
          <volume>157</volume>
          <issue>1</issue>
          <fpage>236</fpage>
          <lpage>238</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31916962"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.chest.2019.08.2176</pub-id>
          <pub-id pub-id-type="medline">31916962</pub-id>
          <pub-id pub-id-type="pii">S0012-3692(19)33728-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC7609956</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bai</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Powell</surname>
              <given-names>CA</given-names>
            </name>
          </person-group>
          <article-title>Epidemiology of lung cancer and lung cancer screening programs in China and the United States</article-title>
          <source>Cancer Lett</source>
          <year>2020</year>
          <volume>468</volume>
          <fpage>82</fpage>
          <lpage>87</lpage>
          <pub-id pub-id-type="doi">10.1016/j.canlet.2019.10.009</pub-id>
          <pub-id pub-id-type="medline">31600530</pub-id>
          <pub-id pub-id-type="pii">S0304-3835(19)30500-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xue</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Challenges and research opportunities for lung cancer screening in China</article-title>
          <source>Cancer Commun (Lond)</source>
          <year>2018</year>
          <month>06</month>
          <day>07</day>
          <volume>38</volume>
          <issue>1</issue>
          <fpage>34</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cancercommun.biomedcentral.com/articles/10.1186/s40880-018-0305-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s40880-018-0305-0</pub-id>
          <pub-id pub-id-type="medline">29880036</pub-id>
          <pub-id pub-id-type="pii">10.1186/s40880-018-0305-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC5992836</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tammemägi</surname>
              <given-names>Martin C</given-names>
            </name>
          </person-group>
          <article-title>Selecting lung cancer screenees using risk prediction models-where do we go from here</article-title>
          <source>Transl Lung Cancer Res</source>
          <year>2018</year>
          <month>06</month>
          <volume>7</volume>
          <issue>3</issue>
          <fpage>243</fpage>
          <lpage>253</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30050763"/>
          </comment>
          <pub-id pub-id-type="doi">10.21037/tlcr.2018.06.03</pub-id>
          <pub-id pub-id-type="medline">30050763</pub-id>
          <pub-id pub-id-type="pii">tlcr-07-03-243</pub-id>
          <pub-id pub-id-type="pmcid">PMC6037970</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tammemagi</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Martel</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>McWilliams</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Goffin</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Johnston</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Nicholas</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Tremblay</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bhatia</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Soghrati</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Yasufuku</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hwang</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Laberge</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Gingras</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pasian</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Couture</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mayo</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Nasute Fauerbach</surname>
              <given-names>PV</given-names>
            </name>
            <name name-style="western">
              <surname>Atkar-Khattra</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Peacock</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Cressman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ionescu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>English</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Finley</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Yee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Puksa</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Tsai</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Haider</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Boylan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cutz</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Manos</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Goss</surname>
              <given-names>GD</given-names>
            </name>
            <name name-style="western">
              <surname>Seely</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Amjadi</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Sekhon</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Burrowes</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>MacEachern</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Urbanski</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sin</surname>
              <given-names>DD</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>WC</given-names>
            </name>
            <name name-style="western">
              <surname>Leighl</surname>
              <given-names>NB</given-names>
            </name>
            <name name-style="western">
              <surname>Shepherd</surname>
              <given-names>FA</given-names>
            </name>
            <name name-style="western">
              <surname>Evans</surname>
              <given-names>WK</given-names>
            </name>
            <name name-style="western">
              <surname>Tsao</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lam</surname>
              <given-names>S</given-names>
            </name>
            <collab>PanCan Study Team</collab>
          </person-group>
          <article-title>Participant selection for lung cancer screening by risk modelling (the Pan-Canadian Early Detection of Lung Cancer [PanCan] study): a single-arm, prospective study</article-title>
          <source>Lancet Oncol</source>
          <year>2017</year>
          <volume>18</volume>
          <issue>11</issue>
          <fpage>1523</fpage>
          <lpage>1531</lpage>
          <pub-id pub-id-type="doi">10.1016/S1470-2045(17)30597-1</pub-id>
          <pub-id pub-id-type="medline">29055736</pub-id>
          <pub-id pub-id-type="pii">S1470-2045(17)30597-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Toumazis</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Bastani</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Plevritis</surname>
              <given-names>SK</given-names>
            </name>
          </person-group>
          <article-title>Risk-based lung cancer screening: a systematic review</article-title>
          <source>Lung Cancer</source>
          <year>2020</year>
          <volume>147</volume>
          <fpage>154</fpage>
          <lpage>186</lpage>
          <pub-id pub-id-type="doi">10.1016/j.lungcan.2020.07.007</pub-id>
          <pub-id pub-id-type="medline">32721652</pub-id>
          <pub-id pub-id-type="pii">S0169-5002(20)30518-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jonas</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Reuland</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Reddy</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Nagle</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>SD</given-names>
            </name>
            <name name-style="western">
              <surname>Weber</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>Enyioha</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Malo</surname>
              <given-names>TL</given-names>
            </name>
            <name name-style="western">
              <surname>Brenner</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Armstrong</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Coker-Schwimmer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Middleton</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Voisin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>RP</given-names>
            </name>
          </person-group>
          <article-title>Screening for lung cancer with low-dose computed tomography: updated evidence report and systematic review for the us preventive services task force</article-title>
          <source>JAMA</source>
          <year>2021</year>
          <volume>325</volume>
          <issue>10</issue>
          <fpage>971</fpage>
          <lpage>987</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2021.0377</pub-id>
          <pub-id pub-id-type="medline">33687468</pub-id>
          <pub-id pub-id-type="pii">2777242</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pinsky</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Electronic health records and machine learning for early detection of lung cancer and other conditions: thinking about the path ahead</article-title>
          <source>Am J Respir Crit Care Med</source>
          <year>2021</year>
          <volume>204</volume>
          <issue>4</issue>
          <fpage>389</fpage>
          <lpage>390</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34097833"/>
          </comment>
          <pub-id pub-id-type="doi">10.1164/rccm.202104-1009ED</pub-id>
          <pub-id pub-id-type="medline">34097833</pub-id>
          <pub-id pub-id-type="pmcid">PMC8480236</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gould</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>BZ</given-names>
            </name>
            <name name-style="western">
              <surname>Tammemagi</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Kinar</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shiff</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Machine learning for early lung cancer identification using routine clinical and laboratory data</article-title>
          <source>Am J Respir Crit Care Med</source>
          <year>2021</year>
          <volume>204</volume>
          <issue>4</issue>
          <fpage>445</fpage>
          <lpage>453</lpage>
          <pub-id pub-id-type="doi">10.1164/rccm.202007-2791OC</pub-id>
          <pub-id pub-id-type="medline">33823116</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>CH</given-names>
            </name>
            <name name-style="western">
              <surname>Duong</surname>
              <given-names>SQ</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Alfreds</surname>
              <given-names>ST</given-names>
            </name>
            <name name-style="western">
              <surname>Stearns</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Kanov</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sylvester</surname>
              <given-names>KG</given-names>
            </name>
            <name name-style="western">
              <surname>Widen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>McElhinney</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Ling</surname>
              <given-names>XB</given-names>
            </name>
          </person-group>
          <article-title>Prediction of the 1-year risk of incident lung cancer: prospective study using electronic health records from the state of Maine</article-title>
          <source>J Med Internet Res</source>
          <year>2019</year>
          <volume>21</volume>
          <issue>5</issue>
          <fpage>e13260</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2019/5/e13260/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/13260</pub-id>
          <pub-id pub-id-type="medline">31099339</pub-id>
          <pub-id pub-id-type="pii">v21i5e13260</pub-id>
          <pub-id pub-id-type="pmcid">PMC6542253</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yeh</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Bai</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>YJ</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence–based prediction of lung cancer risk using nonimaging electronic medical records: deep learning approach</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <volume>23</volume>
          <issue>8</issue>
          <fpage>e26256</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/8/e26256/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/26256</pub-id>
          <pub-id pub-id-type="medline">34342588</pub-id>
          <pub-id pub-id-type="pii">v23i8e26256</pub-id>
          <pub-id pub-id-type="pmcid">PMC8371476</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <collab>Institute of Medicine</collab>
          </person-group>
          <source>Best Care at Lower Cost: The Path to Continuously Learning Health Care in America</source>
          <year>2013</year>
          <publisher-loc>Washington, DC</publisher-loc>
          <publisher-name>National Academies Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <collab>Institute of Medicine</collab>
          </person-group>
          <source>Digital Infrastructure for the Learning Health System: The Foundation for Continuous Improvement in Health and Health Care: Workshop Series Summary</source>
          <year>2011</year>
          <publisher-loc>Washington, DC</publisher-loc>
          <publisher-name>National Academies Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <collab>Institute of Medicine</collab>
          </person-group>
          <source>The Learning Healthcare System: Workshop Summary</source>
          <year>2007</year>
          <publisher-loc>Washington, DC</publisher-loc>
          <publisher-name>National Academies Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Simon</surname>
              <given-names>GE</given-names>
            </name>
            <name name-style="western">
              <surname>Platt</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hernandez</surname>
              <given-names>AF</given-names>
            </name>
          </person-group>
          <article-title>Evidence from pragmatic trials during routine care—slouching toward a learning health system</article-title>
          <source>N Engl J Med</source>
          <year>2020</year>
          <volume>382</volume>
          <issue>16</issue>
          <fpage>1488</fpage>
          <lpage>1491</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMp1915448</pub-id>
          <pub-id pub-id-type="medline">32294344</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <collab>Institute of Medicine</collab>
          </person-group>
          <source>Large Simple Trials and Knowledge Generation in a Learning Health System: Workshop Summary</source>
          <year>2013</year>
          <publisher-loc>Washington, DC</publisher-loc>
          <publisher-name>National Academies Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>DO</given-names>
            </name>
          </person-group>
          <article-title>Simulation of a machine learning enabled learning health system for risk prediction using synthetic patient data</article-title>
          <source>Sci Rep</source>
          <year>2022</year>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>17917</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-022-23011-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-022-23011-4</pub-id>
          <pub-id pub-id-type="medline">36289292</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-022-23011-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC9606301</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>A novel graph methodology for analyzing disease risk factor distribution using synthetic patient data</article-title>
          <source>Healthc Analytics</source>
          <year>2022</year>
          <volume>2</volume>
          <fpage>100084</fpage>
          <pub-id pub-id-type="doi">10.1016/j.health.2022.100084</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Wen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Building practical risk prediction models for nasopharyngeal carcinoma screening with patient graph analysis and machine learning</article-title>
          <source>Cancer Epidemiol Biomarkers Prev</source>
          <year>2023</year>
          <volume>32</volume>
          <issue>2</issue>
          <fpage>274</fpage>
          <lpage>280</lpage>
          <pub-id pub-id-type="doi">10.1158/1055-9965.EPI-22-0792</pub-id>
          <pub-id pub-id-type="medline">36480263</pub-id>
          <pub-id pub-id-type="pii">711506</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Guestrin</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Xgboost: a scalable tree boosting system</article-title>
          <year>2016</year>
          <conf-name>Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name>
          <conf-date>2016</conf-date>
          <conf-loc>San Francisco, CA</conf-loc>
          <fpage>785</fpage>
          <lpage>794</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquaux</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gramfort</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Michel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Thirion</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grisel</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Blondel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Prettenhofer</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dubourg</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Vanderplas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Passos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cournapeau</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Brucher</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Perrot</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Duchesnay</surname>
              <given-names>É</given-names>
            </name>
          </person-group>
          <article-title>Scikit-learn: machine learning in Python</article-title>
          <source>JMLR</source>
          <year>2011</year>
          <volume>12</volume>
          <issue>85</issue>
          <fpage>2825</fpage>
          <lpage>2830</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Granger</surname>
              <given-names>BE</given-names>
            </name>
            <name name-style="western">
              <surname>Perez</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Jupyter: thinking and storytelling with code and data</article-title>
          <source>Comput Sci Eng</source>
          <year>2021</year>
          <volume>23</volume>
          <issue>2</issue>
          <fpage>7</fpage>
          <lpage>14</lpage>
          <pub-id pub-id-type="doi">10.1109/mcse.2021.3059263</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>PC</given-names>
            </name>
            <name name-style="western">
              <surname>Krause</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>How to read articles that use machine learning: users' guides to the medical literature</article-title>
          <source>JAMA</source>
          <year>2019</year>
          <volume>322</volume>
          <issue>18</issue>
          <fpage>1806</fpage>
          <lpage>1816</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2019.16489</pub-id>
          <pub-id pub-id-type="medline">31714992</pub-id>
          <pub-id pub-id-type="pii">2754798</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>The generation of a lung cancer health factor distribution using patient graphs constructed from electronic medical records: retrospective study</article-title>
          <source>J Med Internet Res</source>
          <year>2022</year>
          <volume>24</volume>
          <issue>11</issue>
          <fpage>e40361</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2022/11/e40361/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/40361</pub-id>
          <pub-id pub-id-type="medline">36427233</pub-id>
          <pub-id pub-id-type="pii">v24i11e40361</pub-id>
          <pub-id pub-id-type="pmcid">PMC9736747</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sands</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tammemägi</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Couraud</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Baldwin</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Borondy-Kitts</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Yankelevitz</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Grannis</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Kauczor</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>von Stackelberg</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Sequist</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Pastorino</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>McKee</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Lung screening benefits and challenges: a review of the data and outline for implementation</article-title>
          <source>J Thorac Oncol</source>
          <year>2021</year>
          <volume>16</volume>
          <issue>1</issue>
          <fpage>37</fpage>
          <lpage>53</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1556-0864(20)30993-X"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jtho.2020.10.127</pub-id>
          <pub-id pub-id-type="medline">33188913</pub-id>
          <pub-id pub-id-type="pii">S1556-0864(20)30993-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tanner</surname>
              <given-names>NT</given-names>
            </name>
            <name name-style="western">
              <surname>Brasher</surname>
              <given-names>PB</given-names>
            </name>
            <name name-style="western">
              <surname>Wojciechowski</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ward</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Slatore</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gebregziabher</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Silvestri</surname>
              <given-names>GA</given-names>
            </name>
          </person-group>
          <article-title>Screening adherence in the veterans administration lung cancer screening demonstration project</article-title>
          <source>Chest</source>
          <year>2020</year>
          <volume>158</volume>
          <issue>4</issue>
          <fpage>1742</fpage>
          <lpage>1752</lpage>
          <pub-id pub-id-type="doi">10.1016/j.chest.2020.04.063</pub-id>
          <pub-id pub-id-type="medline">32439505</pub-id>
          <pub-id pub-id-type="pii">S0012-3692(20)31414-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Burnett-Hartman</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Wiener</surname>
              <given-names>RS</given-names>
            </name>
          </person-group>
          <article-title>Lessons learned to promote lung cancer screening and preempt worsening lung cancer disparities</article-title>
          <source>Am J Respir Crit Care Med</source>
          <year>2020</year>
          <volume>201</volume>
          <issue>8</issue>
          <fpage>892</fpage>
          <lpage>893</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31905007"/>
          </comment>
          <pub-id pub-id-type="doi">10.1164/rccm.201912-2398ED</pub-id>
          <pub-id pub-id-type="medline">31905007</pub-id>
          <pub-id pub-id-type="pmcid">PMC7159416</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kanjee</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Crowe</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Rodman</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Accuracy of a generative artificial intelligence model in a complex diagnostic challenge</article-title>
          <source>JAMA</source>
          <year>2023</year>
          <volume>330</volume>
          <issue>1</issue>
          <fpage>78</fpage>
          <lpage>80</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37318797"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jama.2023.8288</pub-id>
          <pub-id pub-id-type="medline">37318797</pub-id>
          <pub-id pub-id-type="pii">2806457</pub-id>
          <pub-id pub-id-type="pmcid">PMC10273128</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>DO</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Benchmarking the symptom-checking capabilities of ChatGPT for a broad range of diseases</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2023</year>
          <month>12</month>
          <day>18</day>
          <fpage>ocad245</fpage>
          <pub-id pub-id-type="doi">10.1093/jamia/ocad245</pub-id>
          <pub-id pub-id-type="medline">38109889</pub-id>
          <pub-id pub-id-type="pii">7477862</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kukhareva</surname>
              <given-names>PV</given-names>
            </name>
            <name name-style="western">
              <surname>Caverly</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Katki</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Cheung</surname>
              <given-names>LC</given-names>
            </name>
            <name name-style="western">
              <surname>Reese</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Del Fiol</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Hess</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wetter</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Taft</surname>
              <given-names>TY</given-names>
            </name>
            <name name-style="western">
              <surname>Flynn</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Kawamoto</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Inaccuracies in electronic health records smoking data and a potential approach to address resulting underestimation in determining lung cancer screening eligibility</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2022</year>
          <volume>29</volume>
          <issue>5</issue>
          <fpage>779</fpage>
          <lpage>788</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35167675"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocac020</pub-id>
          <pub-id pub-id-type="medline">35167675</pub-id>
          <pub-id pub-id-type="pii">6529026</pub-id>
          <pub-id pub-id-type="pmcid">PMC9006678</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sauer</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hyland</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Girbes</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Elbers</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>LA</given-names>
            </name>
          </person-group>
          <article-title>Leveraging electronic health records for data science: common pitfalls and how to avoid them</article-title>
          <source>Lancet Digit Health</source>
          <year>2022</year>
          <volume>4</volume>
          <issue>12</issue>
          <fpage>e893</fpage>
          <lpage>e898</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2589-7500(22)00154-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2589-7500(22)00154-6</pub-id>
          <pub-id pub-id-type="medline">36154811</pub-id>
          <pub-id pub-id-type="pii">S2589-7500(22)00154-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abraham</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Blanco</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Christian</surname>
              <given-names>Jb</given-names>
            </name>
            <name name-style="western">
              <surname>Kass</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Larson</surname>
              <given-names>Eb</given-names>
            </name>
            <name name-style="western">
              <surname>Mazumdar</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Morain</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Newton</surname>
              <given-names>Km</given-names>
            </name>
            <name name-style="western">
              <surname>Ommaya</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Patrick-Lake</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Platt</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Steiner</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zirkle</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lopez</surname>
              <given-names>MH</given-names>
            </name>
          </person-group>
          <article-title>Generating knowledge from best care: advancing the continuously learning health system</article-title>
          <source>NAM Perspectives</source>
          <year>2016</year>
          <publisher-loc>Washington, DC</publisher-loc>
          <publisher-name>National Academy of Medicine</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goldberg</surname>
              <given-names>CB</given-names>
            </name>
            <name name-style="western">
              <surname>Adams</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Blumenthal</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Brennan</surname>
              <given-names>PF</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Butte</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Cheatham</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>deBronkart</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Dixon</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Drazen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Evans</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffman</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Holmes</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Manrai</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Omenn</surname>
              <given-names>GS</given-names>
            </name>
            <name name-style="western">
              <surname>Perlin</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Ramoni</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Sapiro</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Sarkar</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Sood</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Vayena</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Kohane</surname>
              <given-names>IS</given-names>
            </name>
            <collab>RAISE Consortium</collab>
          </person-group>
          <article-title>To do no harm—and the most good—with AI in health care</article-title>
          <source>Nat Med</source>
          <year>2024</year>
          <volume>30</volume>
          <issue>3</issue>
          <fpage>623</fpage>
          <lpage>627</lpage>
          <pub-id pub-id-type="doi">10.1038/s41591-024-02853-7</pub-id>
          <pub-id pub-id-type="medline">38388841</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-024-02853-7</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
