<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR AI</journal-id><journal-id journal-id-type="publisher-id">ai</journal-id><journal-id journal-id-type="index">41</journal-id><journal-title>JMIR AI</journal-title><abbrev-journal-title>JMIR AI</abbrev-journal-title><issn pub-type="epub">2817-1705</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v4i1e68260</article-id><article-id pub-id-type="doi">10.2196/68260</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Use of Automated Machine Learning to Detect Undiagnosed Diabetes in US Adults: Development and Validation Study</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Liu</surname><given-names>Jianxiu</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Ssewamala</surname><given-names>Fred</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>An</surname><given-names>Ruopeng</given-names></name><degrees>MPP, PhD</degrees><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Ji</surname><given-names>Mengmeng</given-names></name><degrees>MBBS, PhD</degrees><xref ref-type="aff" rid="aff6">6</xref></contrib></contrib-group><aff id="aff1"><institution>Division of Sports Science and Physical Education, Tsinghua University</institution><addr-line>Beijing</addr-line><country>China</country></aff><aff id="aff2"><institution>IDG/McGovern Institute for Brain Research, Tsinghua University</institution><addr-line>Beijing</addr-line><country>China</country></aff><aff id="aff3"><institution>Silver School of Social Work, New York University</institution><addr-line>New York</addr-line><addr-line>NY</addr-line><country>United States</country></aff><aff id="aff4"><institution>McSilver Institute for Poverty Policy and Research, New York University</institution><addr-line>New York</addr-line><addr-line>NY</addr-line><country>United States</country></aff><aff id="aff5"><institution>Constance and Martin Silver Center on Data Science and Social Equity, New York University</institution><addr-line>New York</addr-line><addr-line>NY</addr-line><country>United States</country></aff><aff id="aff6"><institution>Division of Public Health Sciences, Department of Surgery, Washington University in St. Louis</institution><addr-line>600 S Taylor Ave</addr-line><addr-line>St. Louis</addr-line><addr-line>MO</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Emam</surname><given-names>Khaled El</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Maheshwari</surname><given-names>Harsh</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Thayil</surname><given-names>Jerry John</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Mengmeng Ji, MBBS, PhD, Division of Public Health Sciences, Department of Surgery, Washington University in St. Louis, 600 S Taylor Ave, St. Louis, MO, 63110, United States, 1 2179799336; <email>j.mengmeng@wustl.edu</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>8</day><month>10</month><year>2025</year></pub-date><volume>4</volume><elocation-id>e68260</elocation-id><history><date date-type="received"><day>31</day><month>10</month><year>2024</year></date><date date-type="rev-recd"><day>23</day><month>05</month><year>2025</year></date><date date-type="accepted"><day>22</day><month>07</month><year>2025</year></date></history><copyright-statement>&#x00A9; Jianxiu Liu, Fred Ssewamala, Ruopeng An, Mengmeng Ji. Originally published in JMIR AI (<ext-link ext-link-type="uri" xlink:href="https://ai.jmir.org">https://ai.jmir.org</ext-link>), 8.10.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR AI, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.ai.jmir.org/">https://www.ai.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://ai.jmir.org/2025/1/e68260"/><abstract><sec><title>Background</title><p>Early diagnosis of diabetes is essential for early interventions to slow the progression of dysglycemia and its comorbidities. However, among individuals with diabetes, about 23% were unaware of their condition.</p></sec><sec><title>Objective</title><p>This study aims to investigate the potential use of automated machine learning (AutoML) models and self-reported data in detecting undiagnosed diabetes among US adults.</p></sec><sec sec-type="methods"><title>Methods</title><p>Individual-level data, including biochemical tests for diabetes, demographic characteristics, family history of diabetes, anthropometric measures, dietary intakes, health behaviors, and chronic conditions, were retrieved from the National Health and Nutrition Examination Survey, 1999&#x2010;2020. Undiagnosed diabetes was defined as having no prior self-reported diagnosis but meeting diagnostic criteria for elevated hemoglobin A<sub>1c</sub>, fasting plasma glucose, or 2-hour plasma glucose. The H2O AutoML framework, which allows for automated hyperparameter tuning, model selection, and ensemble learning, was used to automate the machine learning workflow. For comparative analysis, 4 traditional machine learning models&#x2014;logistic regression, support vector machines, random forest, and extreme gradient boosting&#x2014;were implemented. Model performance was evaluated using the area under the receiver operating characteristic curve.</p></sec><sec sec-type="results"><title>Results</title><p>The study included 11,815 participants aged 20 years and older, comprising 2256 patients with undiagnosed diabetes and 9559 without diabetes. The average age was 59.76 (SD 15.0) years for participants with undiagnosed diabetes and 46.78 (SD 17.2) years for those without diabetes. The AutoML model demonstrated superior performance compared with the 4 traditional machine learning models. The trained AutoML model achieved an area under the receiver operating characteristic curve of 0.909 (95% CI 0.897-0.921) in the test set. The model demonstrated a sensitivity of 70.26%, specificity of 90.46%, positive predictive value of 64.10%, and negative predictive value of 92.61% for identifying undiagnosed diabetes from nondiabetes.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>To our knowledge, this study is the first to utilize the AutoML model for detecting undiagnosed diabetes in US adults. The model&#x2019;s strong performance and applicability to the broader US population make it a promising tool for large-scale diabetes screening efforts.</p></sec></abstract><kwd-group><kwd>machine learning</kwd><kwd>AutoML</kwd><kwd>self-report</kwd><kwd>screening</kwd><kwd>undiagnosed diabetes</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Diabetes mellitus is the eighth leading cause of death in the United States and contributes to substantial health care costs [<xref ref-type="bibr" rid="ref1">1</xref>]. In 2021, an estimated 38.4 million Americans of all ages had diabetes, representing 11.6% of the US population [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]. Of those with diabetes, 22.8% were unaware of or did not report having diabetes [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]. When diabetes is undiagnosed, and consequently hyperglycemia remains unmanaged, severe and irreversible microvascular and macrovascular complications can develop, including diabetic neuropathy, nephropathy, retinopathy, and cardiovascular disease [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref7">7</xref>].</p><p>Screening asymptomatic individuals for undiagnosed diabetes enables earlier diagnosis and treatment, ultimately reducing the risk of complications and premature death [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref10">10</xref>]. The latest American Diabetes Association (ADA) and US Preventive Services Task Force guidelines recommend beginning diabetes screenings at the age of 35 years [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. However, diabetes screening guidelines that rely on blood testing are not widely followed. Only 50%&#x2010;60% of US adults who met the criteria for screening reported receiving glucose testing within the past 3 years [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. The testing rate was alarmingly low among high-risk groups, including those with low education, low household income, and limited health care access [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>].</p><p>Risk assessment tools for diabetes detection using easily accessible and self-reported data have been proposed, but they have shown low overall accuracy and validity in the general population [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref21">21</xref>]. In recent years, various machine learning algorithms have been used to predict diabetes and have yielded better performance than traditional statistics-based models [<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref28">28</xref>]. Few studies have developed machine learning models to detect undiagnosed diabetes in the US population [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>]. Although 2 studies reported a good overall accuracy of 80%, the quality of a positive prediction by models (ie, precision) was notably low, which could lead to a high number of false positives and unnecessary follow-up testing [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>].</p><p>More recently, there has been growing interest within the health care community in automated machine learning (AutoML), which automates machine learning models&#x2019; selection, composition, and parameterization to optimize performance [<xref ref-type="bibr" rid="ref31">31</xref>-<xref ref-type="bibr" rid="ref33">33</xref>]. AutoML uses voting and stacking ensemble techniques to combine multiple learning models, often improving classification accuracy more effectively than a single machine learning algorithm. Its automation also reduces human error and bias by impartially exploring a wide range of machine learning models [<xref ref-type="bibr" rid="ref33">33</xref>]. However, despite its potential, no prior studies have investigated the feasibility and performance of AutoML in screening for undiagnosed diabetes.</p><p>This study aimed to investigate the potential use of AutoML and self-reported data in detecting undiagnosed diabetes among US adults in a nationally representative survey. The trained model could aid in detecting undiagnosed diabetes in the general US population, particularly in underserved populations with limited access to blood glucose tests. This study could also promote the adoption of AutoML in diabetes research.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Data Source</title><p>Individual-level data were retrieved from the National Health and Nutrition Examination Survey (NHANES), 1999&#x2010;2020. NHANES is a nationally representative, repeated cross-sectional study conducted by the National Center for Health Statistics. NHANES adopts a complex, multistage probability sampling design to ensure that the collected data are representative of the noninstitutionalized civilian population in the United States. NHANES includes clinical examinations, selected medical and laboratory tests, and self-reported data. NHANES interviews people in their homes and conducts health examinations in a mobile examination center, including laboratory analysis of blood, urine, and other tissue samples. The detailed study design and methodology of NHANES have been described elsewhere [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>]. This study followed the CREMLS (Consolidated Reporting Guidelines for Prognostic and Diagnostic Machine Learning Models) [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>].</p></sec><sec id="s2-2"><title>Biochemical Tests for Undiagnosed Diabetes</title><p>Following the ADA guidelines [<xref ref-type="bibr" rid="ref38">38</xref>], diabetes was diagnosed based on elevated levels of hemoglobin A<sub>1c</sub> (&#x2265;6.5%), fasting plasma glucose (&#x2265;126 mg/dL), or 2-hour plasma glucose (&#x2265;200 mg/dL) during a 75-g oral glucose tolerance test. In this analysis, undiagnosed diabetes was defined as having no prior self-reported diagnosis but meeting any of the diagnosis criteria for elevated hemoglobin A<sub>1c</sub>, fasting plasma glucose level, or oral glucose tolerance test level. The details of the diagnostic method used to define diabetes in this study are provided in <xref ref-type="table" rid="table1">Table 1</xref>.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Diagnostic method used to define diabetes in this study.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">Diagnostic method</td></tr></thead><tbody><tr><td align="left" valign="bottom" colspan="2">Diabetic</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Diagnosed diabetes</td><td align="left" valign="top">Answer &#x201C;Yes&#x201D; to &#x201C;Other than during pregnancy, have you ever been told by a doctor or health professional that you have diabetes or sugar diabetes?&#x201D;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Undiagnosed diabetes</td><td align="left" valign="top">Answer &#x201C;No&#x201D; to &#x201C;Other than during pregnancy, have you ever been told by a doctor or health professional that you have diabetes or sugar diabetes?&#x201D;<break/>AND<break/>Any of the following tests meet criteria:<list list-type="bullet"><list-item><p><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>HbA<sub>1c</sub><sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>&#x2265;6.5% (&#x2265;48 mmol/mol).</p></list-item><list-item><p><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>FPG<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup>&#x2265;126 mg/dL (&#x2265;7.0 mmol/L).</p></list-item><list-item><p><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>2-h PG<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup>&#x2265;200 mg/dL (&#x2265;11.1 mmol/L) during OGTT<sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup>.</p></list-item></list></td></tr><tr><td align="left" valign="top" colspan="2">Nondiabetic</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Prediabetes</td><td align="left" valign="top">Does not meet criteria for diabetes diagnosis<break/>AND<break/>Any of the following tests meet criteria:<list list-type="bullet"><list-item><p><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>HbA<sub>1c</sub> 5.7%&#x2010;6.4% (39&#x2010;47 mmol/mol)</p></list-item><list-item><p><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>FPG 100&#x2010;125 mg/dL (5.6&#x2010;6.9 mmol/L)</p></list-item><list-item><p><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>2-h PG 140&#x2010;199 mg/dL (7.8&#x2010;11.0 mmol/L)</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Normoglycemia</td><td align="left" valign="top">All the following tests meet criteria:<list list-type="bullet"><list-item><p><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>HbA<sub>1c</sub>&#x003C;5.7% (&#x003C;39 mmol)</p></list-item><list-item><p><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>FPG&#x003C;100 mg/dL (&#x003C;5.6 mmol/)</p></list-item><list-item><p><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>2-h PG&#x003C;140 mg/dL (&#x003C;7.8 mmol/L)</p></list-item></list></td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>HbA<sub>1c</sub>: hemoglobin A<sub>1c</sub>.</p></fn><fn id="table1fn2"><p><sup>b</sup>FPG: fasting plasma glucose.</p></fn><fn id="table1fn3"><p><sup>c</sup>PG: plasma glucose.</p></fn><fn id="table1fn4"><p><sup>d</sup>OGTT: oral glucose tolerance test.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2-3"><title>Participant Selection</title><p>The study utilized data from NHANES 1999&#x2010;2020, comprising an initial cohort of 112,502 participants. Participants with self-reported diabetes (n=8657) and those with missing data on self-reported diabetes status (n=9672) were excluded. Individuals aged &#x003C;20 years (n=43,879) and pregnant females (n=1540) were removed from the cohort. Participants with missing laboratory results for diabetes were further excluded (n=36,939). For inclusion in the nondiabetic group, participants had to meet all 3 test criteria to confirm the absence of diabetes. For the diabetes group, participants were included if at least one test met the diagnostic criteria, even if the other two test results were missing. In total, the study cohort included 11,815 participants and was categorized into 2 groups: 9559 without diabetes and 2256 with undiagnosed diabetes.</p></sec><sec id="s2-4"><title>Features</title><sec id="s2-4-1"><title>Demographic Characteristics</title><p>Demographic features included age at the survey, gender (male/female), race/ethnicity (non-Hispanic White, non-Hispanic Black, Mexican American, and other races), educational attainment (lower than 9th-grade education, 9th- to 11th-grade education, high school, some college or associate degree, college or higher), marital status (married, widowed, divorced, separated, never married, living with a partner), and income-to-poverty ratio (ratio of monthly family income to the poverty guidelines).</p><p>Family history of diabetes was ascertained from the Medical Conditions Questionnaire: &#x201C;Including living and deceased, were any of your biological, that is, blood relatives including grandparents, parents, brothers, sisters ever told by a health professional that they had diabetes?&#x201D;</p></sec><sec id="s2-4-2"><title>Anthropometric Measures</title><p>Participants were weighed in mobile examination centers, wearing only underclothing and an examination gown. Weight was recorded on a digital scale in kilograms. Standing height was measured using a stadiometer with a fixed vertical backboard and an adjustable headpiece. BMI was calculated as measured weight in kilograms divided by height in meters squared. Waist circumference was measured just above the iliac crest using a steel measuring tape.</p></sec><sec id="s2-4-3"><title>Diet Intake and Behaviors</title><p>In NHANES, 24-hour dietary recalls were administered to obtain detailed nutritional intake information from participants. Daily dietary intake (the average of 2 d) of energy (kcal), total fat (g), cholesterol (mg), and total sugars (g) was calculated. The frequency of eating out per week was obtained from the question: &#x201C;On average, how many times per week do you eat meals prepared in a restaurant?&#x201D;</p></sec><sec id="s2-4-4"><title>Health Behaviors</title><p>The NHANES physical activity questionnaire included questions about daily and leisure-time activities. The average hours spent in each activity were multiplied by the suggested metabolic equivalent (MET) scores to estimate MET hours per week [<xref ref-type="bibr" rid="ref39">39</xref>]. Indicators (yes/no) for smoking and drinking were obtained from answers to questions: &#x201C;In any one year, have you had at least 12 drinks of any type of alcoholic beverage?&#x201D; and &#x201C;Have you smoked at least 100 cigarettes in your entire life?&#x201D;</p></sec><sec id="s2-4-5"><title>Chronic Conditions</title><p>Comorbid conditions were obtained from self-reports to the Medical Conditions Questionnaire, &#x201C;Have you ever been told by a doctor that you had (medical problem)?&#x201D; which includes hypertension, rheumatoid arthritis, myocardial infarction, congestive heart failure, coronary heart disease, stroke, liver disease, weak/failing kidneys, and cancer/malignancy of any kind.</p></sec></sec><sec id="s2-5"><title>AutoML and Custom Machine Learning Models</title><p>The H2O AutoML framework was used in this study to automate the machine learning workflow. The H2O AutoML trains several models, cross-validated by default, by using the following available algorithms: extreme gradient boosting (XGBoost), gradient boosting machine, generalized linear model, distributed random forest, extremely randomized trees, and fully connected deep neural network. H2O AutoML introduces two essential advancements to optimize model performance. First, it fine-tunes base models using a fast random search approach, where hyperparameters are selected from a range of values identified as most impactful. Second, H2O AutoML leverages a sophisticated stacking technique to create two powerful ensemble models: &#x201C;All models ensemble,&#x201D; which combines all the base models trained, and &#x201C;Best of the Family ensemble,&#x201D; which contains the best-performing models. The stacked ensemble models are designed to leverage the diverse strengths of various algorithms, resulting in a final model that is accurate and generalizable across different datasets. H2O AutoML has built-in functionality for class balancing and handling of missing values. Detailed documentation, as well as directions for algorithms and the implementation of H2O.ai, are available online [<xref ref-type="bibr" rid="ref40">40</xref>].</p><p>We randomly split the dataset into training (70%) and test (30%) sets. AutoML trained diverse base models on the training set, ranking them by cross-validated area under the receiver operating characteristic curve (AUC). Cross-validation was performed using a 5-fold approach, with models iteratively trained on 4 subsets of the training set and validated on the remaining subset. A stacked ensemble (&#x201C;leader&#x201D;) was constructed by blending the top-performing base models via a meta learner. This stacked ensemble model was exported and applied to our independent holdout test set to generate class-probability predictions. The 95% CIs for each AUC were derived from 1000 bootstrap resamples of the test set. Confusion matrices were constructed to calculate sensitivity, specificity, positive predictive value (PPV), and negative predictive value (NPV) using the classification threshold that maximized the <italic>F</italic><sub>1</sub>-score that yields the highest harmonic mean of precision and recall on the test set. We extracted feature importance from each base learner and computed a weighted aggregate in the H2O leader model. For tree-based models, feature importance was determined by the frequency of each feature used for splitting and the overall reduction in squared error. For non&#x2013;tree-based models, importance was based on coefficient magnitudes.</p><p>In addition to the AutoML model, we conducted a comparative analysis using 4 traditional machine learning models&#x2014;logistic regression, support vector machines, random forest, and XGBoost, with Synthetic Minority Over-sampling Technique applied to address data imbalance during training [<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref42">42</xref>].</p><p>The clinical guidelines generally recommend confirming an elevated test with a secondary measurement for the diagnosis of diabetes [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref43">43</xref>]. In the main analyses, diabetes was diagnosed based on a single elevated test result. In additional analyses, the diagnosis of diabetes was confirmed by at least 2 elevated tests recommended by the ADA guidelines. We also evaluated the performance of a 3-class prediction model for diabetes within the AutoML framework, using classification schemes that distinguished between normoglycemia, prediabetes, and undiagnosed diabetes.</p><p>Summary statistics for participant characteristics, stratified by diabetes status, were calculated. Categorical variables were compared using <italic>&#x03C7;</italic><sup>2</sup> tests, and continuous variables were evaluated using independent samples <italic>t</italic> tests. Missing data were imputed using mean values for continuous variables and mode for categorical variables. We used STATA 18 (StataCorp LLC) for data preparation and Python version 3.10.12 (Python Software Foundation) to implement the H2O AutoML (H2O.ai, Inc) and custom machine learning models (version 3.46.0.6).</p></sec><sec id="s2-6"><title>Ethical Considerations</title><p>This study used publicly available, deidentified NHANES data. In accordance with the US Department of Health and Human Services (Title 45 of the Code of Federal Regulations; &#x00A7;46.104 (d), section 4) [<xref ref-type="bibr" rid="ref44">44</xref>], analyses of publicly available, deidentified data are not considered human subjects research and therefore do not require review by the Washington University Institutional Review Board.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>In total, the study cohort included 11,815 participants, with 9559 participants without diabetes and 2256 with undiagnosed diabetes. The characteristics of the study cohort are summarized in <xref ref-type="table" rid="table2">Table 2</xref>. The average ages were 59.76 (SD 15.0) years for those with undiagnosed diabetes and 46.78 (SD 17.2) years for those without. The diabetes group had a higher proportion of males, lower levels of education, and a greater likelihood of having a relative with diabetes. Additionally, patients with undiagnosed diabetes had higher BMI, larger waist circumference, and a higher prevalence of chronic conditions. The flow diagram of participant selection is presented in <xref ref-type="fig" rid="figure1">Figure 1</xref>.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Cohort characteristics by diabetes status.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">No diabetes (n=9559)</td><td align="left" valign="bottom">Undiagnosed diabetes (n=2256)</td><td align="left" valign="bottom"><italic>P</italic> value</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="4">Laboratory tests, mean (SD)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Glycohemoglobin (%)</td><td align="left" valign="top">5.41 (0.38)</td><td align="left" valign="top">6.88 (1.60)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fasting glucose plasma (mg/dL)</td><td align="left" valign="top">98.18 (9.46)</td><td align="left" valign="top">141.38 (46.79)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Oral glucose tolerance test (mg/dL)</td><td align="left" valign="top">109.53 (31.95)</td><td align="left" valign="top">229.24 (75.68)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top" colspan="4">Demographic characteristics</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Age (years), mean (SD)</td><td align="left" valign="top">46.78 (17.19)</td><td align="left" valign="top">59.76 (14.98)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top" colspan="3"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gender, n (%)</td><td align="left" valign="top">.005</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Male</td><td align="left" valign="top">4711 (49.28)</td><td align="left" valign="top">1186 (52.57)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Female</td><td align="left" valign="top">4848 (50.72)</td><td align="left" valign="top">1070 (47.43)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="3"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Race/ethnicity, n (%)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Non-Hispanic White</td><td align="left" valign="top">4381 (45.83)</td><td align="left" valign="top">835 (37.01)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Non-Hispanic Black</td><td align="left" valign="top">1750 (18.31)</td><td align="left" valign="top">504 (22.34)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Hispanic</td><td align="left" valign="top">2459 (25.72)</td><td align="left" valign="top">698 (30.94)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Other</td><td align="left" valign="top">969 (10.14)</td><td align="left" valign="top">219 (9.71)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="3"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Education, n (%)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>&#x2264;9 grade</td><td align="left" valign="top">881 (9.22)</td><td align="left" valign="top">434 (19.28)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>9th-11th grade</td><td align="left" valign="top">1301 (13.62)</td><td align="left" valign="top">391 (17.37)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>High school</td><td align="left" valign="top">2127 (22.27)</td><td align="left" valign="top">554 (24.61)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Some college</td><td align="left" valign="top">2801 (29.32)</td><td align="left" valign="top">543 (24.12)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>College or above</td><td align="left" valign="top">2443 (25.57)</td><td align="left" valign="top">329 (14.62)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="3"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Marital status, n (%)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Married</td><td align="left" valign="top">5016 (52.47)</td><td align="left" valign="top">1248 (55.54)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Widowed</td><td align="left" valign="top">568 (5.94)</td><td align="left" valign="top">390 (17.36)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Divorced</td><td align="left" valign="top">950 (9.94)</td><td align="left" valign="top">251 (11.17)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Separated</td><td align="left" valign="top">330 (3.45)</td><td align="left" valign="top">72 (3.20)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Never married</td><td align="left" valign="top">1843 (19.28)</td><td align="left" valign="top">192 (8.54)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Living with partner</td><td align="left" valign="top">852 (8.91)</td><td align="left" valign="top">94 (4.18)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Income-to-poverty ratio, mean (SD)</td><td align="left" valign="top">2.60 (1.64)</td><td align="left" valign="top">2.29 (1.53)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Family history of diabetes, n (%)</td><td align="left" valign="top">3394 (36.16)</td><td align="left" valign="top">1072 (48.79)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top" colspan="4">Anthropometric measures, mean (SD)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Height (cm)</td><td align="left" valign="top">167.89 (10.00)</td><td align="left" valign="top">165.84 (10.24)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Weight (kg)</td><td align="left" valign="top">80.12 (20.69)</td><td align="left" valign="top">88.19 (23.32)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>BMI (kg/m<sup>2</sup>)</td><td align="left" valign="top">28.33 (6.53)</td><td align="left" valign="top">31.94 (7.43)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Waist circumference (cm)</td><td align="left" valign="top">96.92 (15.32)</td><td align="left" valign="top">107.5 (15.69)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top" colspan="4">Diet intake and eating behavior, mean (SD)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Daily total energy (kcal)</td><td align="left" valign="top">2090.13 (837.69)</td><td align="left" valign="top">1912.17 (851.19)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Daily total fat (g)</td><td align="left" valign="top">78.27 (38.36)</td><td align="left" valign="top">72.29 (39.76)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Daily total sugars (g)</td><td align="left" valign="top">113.59 (65.37)</td><td align="left" valign="top">104.57 (68.14)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Daily total cholesterol (mg)</td><td align="left" valign="top">287.05 (193.07)</td><td align="left" valign="top">285.82 (201.17)</td><td align="left" valign="top">.80</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Times of dining out per week</td><td align="left" valign="top">3.43 (3.84)</td><td align="left" valign="top">2.60 (3.43)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top" colspan="4">Health behaviors</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Physical activity (MET<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup>-h/wk), mean (SD)</td><td align="left" valign="top">2.36 (3.69)</td><td align="left" valign="top">1.62 (2.81)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Smoking, n (%)</td><td align="left" valign="top">4200 (43.97)</td><td align="left" valign="top">1031 (48.91)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Drinking, n (%)</td><td align="left" valign="top">6758 (74.26)</td><td align="left" valign="top">1262 (67.52)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top" colspan="4">Self-reported chronic conditions, n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Hypertension</td><td align="left" valign="top">2778 (29.10)</td><td align="left" valign="top">1188 (52.85)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Rheumatoid arthritis</td><td align="left" valign="top">2206 (23.12)</td><td align="left" valign="top">778 (34.55)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Myocardial infarction</td><td align="left" valign="top">283 (2.96)</td><td align="left" valign="top">152 (6.76)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Congestive heart failure</td><td align="left" valign="top">182 (1.91)</td><td align="left" valign="top">112 (4.99)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Coronary heart disease</td><td align="left" valign="top">261 (2.74)</td><td align="left" valign="top">145 (6.47)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Stroke</td><td align="left" valign="top">231 (2.42)</td><td align="left" valign="top">127 (5.64)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Liver disease</td><td align="left" valign="top">295 (3.09)</td><td align="left" valign="top">117 (5.19)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Weak/failing kidneys</td><td align="left" valign="top">199 (2.08)</td><td align="left" valign="top">64 (2.84)</td><td align="left" valign="top">.03</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Cancer</td><td align="left" valign="top">760 (7.95)</td><td align="left" valign="top">257 (11.40)</td><td align="left" valign="top">&#x003C;.001</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>MET: metabolic equivalent.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Flow diagram of participant selection. 2-h PG: 2-hour plasma glucose; A<sub>1c</sub>: hemoglobin A<sub>1c</sub>; FPG: fasting plasma glucose; NHANES: National Health and Nutrition Examination Survey.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e68260_fig01.png"/></fig><p>The performance of the AutoML model and traditional machine learning models is summarized in <xref ref-type="fig" rid="figure2">Figure 2</xref>. The AutoML model demonstrated superior performance compared to the 4 traditional machine learning models&#x2014;logistic regression, support vector machines, random forest, and XGBoost. The trained AutoML model achieved an AUC of 0.909 (95% CI 0.897-0.921) and an accuracy of 86.5% in the test set. The model demonstrated a sensitivity of 70.26%, specificity of 90.46%, PPV of 64.10%, and NPV of 92.61% for identifying undiagnosed diabetes from nondiabetes (<xref ref-type="table" rid="table3">Table 3</xref>). The model summary and details were provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Performance of AutoML model and custom machine learning models in detecting undiagnosed diabetes on the test set. AutoML: automated machine learning; XGBoost: extreme gradient boosting.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e68260_fig02.png"/></fig><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Confusion matrix for the classification of undiagnosed diabetes using the AutoML model.<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom"/><td align="left" valign="bottom" colspan="2">Predicted label, n (%)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">No diabetes</td><td align="left" valign="top">Undiagnosed diabetes</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="3">True label</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>No diabetes</td><td align="left" valign="top">2558 (73)</td><td align="left" valign="top">270 (8)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Undiagnosed diabetes</td><td align="left" valign="top">204 (6)</td><td align="left" valign="top">482 (14)</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>Note: The cutoff threshold was 0.248718, optimized for <italic>F</italic><sub>1</sub>-score that maximized the harmonic mean of precision and recall. The matrix shows the distribution of true labels against predicted labels. The cell values indicate the number of instances (absolute counts) and their corresponding percentages of the total in the test data. Sensitivity (482/686, 70.26%), specificity (2558/2828, 90.45%), positive predictive value (482/752, 64.10%), and negative predictive value (2558/2762, 92.61%) are derived from the matrix to assess model performance.</p></fn></table-wrap-foot></table-wrap><p>The top 5 features are age, waist circumference, daily total sugar intake, income, and BMI, together accounting for 50% of total model importance. Comorbidities, except hypertension, contributed minimally relative to demographic and behavioral factors. Excluding comorbidities (except hypertension) resulted in comparable model performance, with an AUC of 0.830 and an accuracy of 85.1%.</p><p>Additional analysis results are summarized in <xref ref-type="table" rid="table4">Table 4</xref>. The model using diabetes diagnosis criteria with a confirmatory test achieved a testing accuracy of 98.0% and 89.7% with an AUC of 0.823. However, precision (ie, PPV) and recall (ie, sensitivity) were suboptimal due to the small number of patients meeting the diabetes criteria with &#x2265;2 tests. The model demonstrated a sensitivity of 44.10%, specificity of 92.22%, PPV of 24.23%, and NPV of 96.69% for identifying undiagnosed diabetes from nondiabetes. The performance of the multiclass prediction model was poor, with an overall accuracy of 58.9% for using diagnosis criteria with &#x2265;1 test and 67.1% for using diagnosis criteria with &#x2265;2 tests.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Additional models for alternative diabetes diagnosis criteria with the second confirmative test and multiclass prediction including prediabetes.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom"/><td align="left" valign="bottom" colspan="2">Undiagnosed diabetes (&#x2265;2 test) versus no diabetes<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="bottom" colspan="2">Undiagnosed diabetes (&#x2265;1 test) versus prediabetes versus normoglycemia</td><td align="left" valign="bottom" colspan="2">Undiagnosed diabetes (&#x2265;2 test) versus prediabetes versus normoglycemia</td></tr></thead><tbody><tr><td align="left" valign="top"/><td align="left" valign="top">Train</td><td align="left" valign="top">Test</td><td align="left" valign="top">Train</td><td align="left" valign="top">Test</td><td align="left" valign="top">Train</td><td align="left" valign="top">Test</td></tr><tr><td align="left" valign="top">Accuracy (%)</td><td align="left" valign="top">98.0</td><td align="left" valign="top">89.7</td><td align="left" valign="top">66.8</td><td align="left" valign="top">59.0</td><td align="left" valign="top">68.6</td><td align="left" valign="top">67.1</td></tr><tr><td align="left" valign="top">Overall AUC<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup></td><td align="left" valign="top">0.993</td><td align="left" valign="top">0.823</td><td align="left" valign="top">0.627</td><td align="left" valign="top">0.557</td><td align="left" valign="top">0.588</td><td align="left" valign="top">0.557</td></tr><tr><td align="left" valign="top">AUC (normal versus rest)</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup></td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.502</td><td align="left" valign="top">0.445</td><td align="left" valign="top">0.316</td><td align="left" valign="top">0.307</td></tr><tr><td align="left" valign="top">AUC (prediabetes versus rest)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.814</td><td align="left" valign="top">0.652</td><td align="left" valign="top">0.792</td><td align="left" valign="top">0.715</td></tr><tr><td align="left" valign="top">AUC (diabetes versus rest)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.415</td><td align="left" valign="top">0.545</td><td align="left" valign="top">0.555</td><td align="left" valign="top">0.673</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>&#x201C;No diabetes&#x201D; group includes normoglycemia and prediabetes.</p></fn><fn id="table4fn2"><p><sup>b</sup>AUC: area under the receiver operating characteristic curve.</p></fn><fn id="table4fn3"><p><sup>c</sup>Not applicable.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><p>To our knowledge, this study is the first to utilize the AutoML model for detecting undiagnosed diabetes in US adults. The best-performing model achieved an AUC of 0.91 and an accuracy of 86.5% in the test set. National surveillance shows that nearly half of those with undiagnosed diabetes have hypertension, lipid abnormalities, or cardiovascular and chronic kidney diseases [<xref ref-type="bibr" rid="ref45">45</xref>-<xref ref-type="bibr" rid="ref48">48</xref>]. Delayed diagnosis of diabetes hinders the opportunities for early intervention to slow the progression of dysglycemia and its comorbidities. Our model was trained and tested using a substantial and diverse dataset comprising nationally representative survey data. The model&#x2019;s high accuracy and applicability to the broader US population make it a promising tool for large-scale diabetes screening efforts.</p><p>The feature importance ranking of the best-performing model highlights waist circumference, BMI, and dietary variables as key predictors, underscoring their strong links to metabolic health, insulin resistance, and dietary habits that influence diabetes risk. Lifestyle factors such as drinking frequency and physical activity also emerged as significant contributors, whereas self-reported comorbidities played a smaller role once anthropometric and behavioral measures were included. These findings align with epidemiological studies and improve the model&#x2019;s interpretability, providing actionable insights to prioritize targeted interventions for modifiable risk factors [<xref ref-type="bibr" rid="ref49">49</xref>].</p><p>Machine learning has advanced clinical research but faces adoption barriers like data access, imbalances, and reliance on data science expertise for deployment [<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref51">51</xref>]. AutoML reduces the need for machine learning expertise, enabling clinicians to use advanced technologies without programming skills and integrating them into research and clinical practice [<xref ref-type="bibr" rid="ref52">52</xref>-<xref ref-type="bibr" rid="ref55">55</xref>]. Despite being promising, few studies have explored the application of AutoML for diabetes diagnosis [<xref ref-type="bibr" rid="ref56">56</xref>].</p><p>Previous studies have compared traditional machine learning models with conventional statistical models for identifying undiagnosed diabetes, demonstrating that machine learning models outperform statistical models [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>]. These studies reported AUC values between 0.73 and 0.81, consistent with the performance of traditional models in this study. However, the reported low PPVs highlighted the limited ability of these models to accurately identify undiagnosed diabetes cases [<xref ref-type="bibr" rid="ref30">30</xref>]. This study showed that AutoML models are superior and outperformed traditional machine learning models in detecting undiagnosed diabetes. Similarly, one study has reported that the AutoML model outperformed both individual and ensemble models in identifying patients with diabetes using electronic medical records data [<xref ref-type="bibr" rid="ref57">57</xref>]. These findings suggest that AutoML provides a more accessible and efficient approach, eliminating the need for manual optimization while delivering superior performance.</p><p>Nonetheless, several issues with the AutoML model in diabetes screening should be noted. When applying more stringent diabetes diagnostic criteria, the accuracy reached 90%; however, precision and recall were low, likely due to the limited number of samples that met the &#x2265;2 test criteria. H2O AutoML provides significant advantages, including its built-in class balancing functionality, which automates the handling of moderate class imbalance without requiring external implementations. Using random sampling to upsample minority classes or downsample majority classes, it effectively manages datasets with moderate imbalance, such as the 4:1 ratio under the 1+ test criterion for diabetes diagnosis in this study. However, with the stricter 2+ test criterion, the imbalance ratio rose to 24:1, likely exceeding H2O AutoML&#x2019;s ability to mitigate the imbalance. This severe imbalance impacted the model&#x2019;s ability to accurately distinguish undiagnosed diabetes from nondiabetes in unseen data, reducing precision and recall. For such highly imbalanced datasets, combining AutoML with other data resampling methods, such as Synthetic Minority Over-sampling Technique, could better improve model performance [<xref ref-type="bibr" rid="ref58">58</xref>].</p><p>In addition, the model&#x2019;s performance in multiclass classification of no diabetes, prediabetes, and undiagnosed diabetes was notably poor. Prediabetes is an intermediate stage between normal glycemia and diabetes and is highly prevalent [<xref ref-type="bibr" rid="ref59">59</xref>]. Clinically, prediabetes and diabetes, as the continuum of dysglycemia, share many overlapping risk factors, such as insulin resistance and elevated glucose levels [<xref ref-type="bibr" rid="ref60">60</xref>], making their differentiation challenging. The subtle metabolic differences between these conditions may not have been adequately captured by the included self-reported data. Future efforts should focus on incorporating additional features and refining the model architecture to enhance accuracy and improve its ability to identify prediabetes.</p><p>A major strength of this model is its use of NHANES data, which is nationally representative of the US population, enhancing the generalizability of the findings. The model incorporates comprehensive self-reported data, including nutritional information, for predicting undiagnosed diabetes. The application of AutoML in this study represents the first use of this approach in diabetes research, providing a foundation for further development and validation of similar models. However, the widespread adoption of our AutoML in health care requires further development and validation. False positives can lead to unnecessary tests, higher costs, and patient anxiety, while false negatives may delay treatment and worsen outcomes. Using AutoML models in real-world clinical settings requires meticulous threshold optimization and validation to achieve an appropriate balance between precision and recall. These models should be rigorously evaluated for fairness and equity, ensuring that performance does not vary significantly across demographic groups [<xref ref-type="bibr" rid="ref61">61</xref>,<xref ref-type="bibr" rid="ref62">62</xref>]. Practical barriers to implementing AutoML in clinical practice also include integration with electronic health record systems and the lack of trust in &#x201C;black-box&#x201D; models due to their opacity [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref61">61</xref>-<xref ref-type="bibr" rid="ref63">63</xref>].</p><p>Several limitations should be noted. First, the definition of undiagnosed diabetes in the base model was based on a single elevated measurement, which may not fully capture the condition. However, the model still showed utility as a screening tool. Second, the model demonstrated poor performance in multiclass prediction, including prediabetes, indicating that additional feature refinement and model adjustments may be necessary to improve accuracy in 3-class predictions. Third, known diabetes diagnoses rely on self-reports, which may introduce potential recall bias and misclassification. This could lead to mislabeling cases and diminishing model performance. Although most published public health studies still rely on self-reports&#x2014;hence making our study not an unusual one in using self-reports&#x2014;future studies should aim to validate self-reported data against medical records where feasible to minimize errors and increase reliability of the observations. Finally, the generalizability of this model may be limited to US populations and may not extend to non-US populations.</p><p>This study demonstrates the potential of AutoML in detecting undiagnosed diabetes using self-reported and easily accessible data. Although challenges remain in accurately classifying multiple categories, including prediabetes, the model shows promise as a tool for large-scale diabetes screening. Further refinement and validation are required to improve its applicability across diverse populations.</p></sec></body><back><ack><p>JL was supported by Beijing Educational Sciences planning project (CCHA241) and Sports Special Project of China Disabled Persons' Federation (2024 KFJS &#x0026; 001).</p></ack><fn-group><fn fn-type="con"><p>MJ and RA conceived and designed the study. JL and MJ contributed to data curation, formal analysis, funding acquisition, and investigation. MJ was responsible for resources and validation. Visualization was performed by JL and MJ. The original draft of the manuscript was written by JL and MJ, with critical review and editing provided by JL, FS, RA, and MJ.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">ADA</term><def><p>American Diabetes Association</p></def></def-item><def-item><term id="abb2">AUC</term><def><p>area under the receiver operating characteristic curve</p></def></def-item><def-item><term id="abb3">AutoML</term><def><p>automated machine learning</p></def></def-item><def-item><term id="abb4">CREMLS</term><def><p>Consolidated Reporting Guidelines for Prognostic and Diagnostic Machine Learning Models</p></def></def-item><def-item><term id="abb5">MET</term><def><p>metabolic equivalent</p></def></def-item><def-item><term id="abb6">NHANES</term><def><p>National Health and Nutrition Examination Survey</p></def></def-item><def-item><term id="abb7">NPV</term><def><p>negative predictive value</p></def></def-item><def-item><term id="abb8">PPV</term><def><p>positive predictive value</p></def></def-item><def-item><term id="abb9">XGBoost</term><def><p>extreme gradient boosting</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ahmad</surname><given-names>FB</given-names> </name><name name-style="western"><surname>Anderson</surname><given-names>RN</given-names> </name></person-group><article-title>The leading causes of death in the US for 2020</article-title><source>JAMA</source><year>2021</year><month>05</month><day>11</day><volume>325</volume><issue>18</issue><fpage>1829</fpage><lpage>1830</lpage><pub-id pub-id-type="doi">10.1001/jama.2021.5469</pub-id><pub-id pub-id-type="medline">33787821</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="web"><article-title>Statistics about diabetes</article-title><source>American Diabetes Association</source><access-date>2025-01-27</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://diabetes.org/about-diabetes/statistics/about-diabetes">https://diabetes.org/about-diabetes/statistics/about-diabetes</ext-link></comment></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="web"><article-title>Diabetes statistics</article-title><source>National Institute of Diabetes and Digestive and Kidney Diseases</source><access-date>2025-01-27</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.niddk.nih.gov/health-information/health-statistics/diabetes-statistics">https://www.niddk.nih.gov/health-information/health-statistics/diabetes-statistics</ext-link></comment></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Beagley</surname><given-names>J</given-names> </name><name name-style="western"><surname>Guariguata</surname><given-names>L</given-names> </name><name name-style="western"><surname>Weil</surname><given-names>C</given-names> </name><name name-style="western"><surname>Motala</surname><given-names>AA</given-names> </name></person-group><article-title>Global estimates of undiagnosed diabetes in adults</article-title><source>Diabetes Res Clin Pract</source><year>2014</year><month>02</month><volume>103</volume><issue>2</issue><fpage>150</fpage><lpage>160</lpage><pub-id pub-id-type="doi">10.1016/j.diabres.2013.11.001</pub-id><pub-id pub-id-type="medline">24300018</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fowler</surname><given-names>MJ</given-names> </name></person-group><article-title>Microvascular and macrovascular complications of diabetes</article-title><source>Clin Diabetes</source><year>2011</year><month>07</month><day>1</day><volume>29</volume><issue>3</issue><fpage>116</fpage><lpage>122</lpage><pub-id pub-id-type="doi">10.2337/diaclin.29.3.116</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Deshpande</surname><given-names>AD</given-names> </name><name name-style="western"><surname>Harris-Hayes</surname><given-names>M</given-names> </name><name name-style="western"><surname>Schootman</surname><given-names>M</given-names> </name></person-group><article-title>Epidemiology of diabetes and diabetes-related complications</article-title><source>Phys Ther</source><year>2008</year><month>11</month><volume>88</volume><issue>11</issue><fpage>1254</fpage><lpage>1264</lpage><pub-id pub-id-type="doi">10.2522/ptj.20080020</pub-id><pub-id pub-id-type="medline">18801858</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ahmad</surname><given-names>F</given-names> </name><name name-style="western"><surname>Joshi</surname><given-names>SH</given-names> </name></person-group><article-title>Self-care practices and their role in the control of diabetes: a narrative review</article-title><source>Cureus</source><year>2023</year><month>07</month><volume>15</volume><issue>7</issue><fpage>e41409</fpage><pub-id pub-id-type="doi">10.7759/cureus.41409</pub-id><pub-id pub-id-type="medline">37546053</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ali</surname><given-names>MK</given-names> </name><name name-style="western"><surname>McKeever Bullard</surname><given-names>K</given-names> </name><name name-style="western"><surname>Imperatore</surname><given-names>G</given-names> </name><etal/></person-group><article-title>Reach and use of diabetes prevention services in the United States, 2016-2017</article-title><source>JAMA Netw Open</source><year>2019</year><month>05</month><day>3</day><volume>2</volume><issue>5</issue><fpage>e193160</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2019.3160</pub-id><pub-id pub-id-type="medline">31074808</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ali</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Bullard</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Gregg</surname><given-names>EW</given-names> </name><name name-style="western"><surname>Del Rio</surname><given-names>C</given-names> </name></person-group><article-title>A cascade of care for diabetes in the United States: visualizing the gaps</article-title><source>Ann Intern Med</source><year>2014</year><month>11</month><day>18</day><volume>161</volume><issue>10</issue><fpage>681</fpage><lpage>689</lpage><pub-id pub-id-type="doi">10.7326/M14-0019</pub-id><pub-id pub-id-type="medline">25402511</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><collab>Diabetes Prevention Program Research Group</collab><name name-style="western"><surname>Knowler</surname><given-names>WC</given-names> </name><name name-style="western"><surname>Fowler</surname><given-names>SE</given-names> </name><etal/></person-group><article-title>10-year follow-up of diabetes incidence and weight loss in the Diabetes Prevention Program Outcomes Study</article-title><source>Lancet</source><year>2009</year><month>11</month><day>14</day><volume>374</volume><issue>9702</issue><fpage>1677</fpage><lpage>1686</lpage><pub-id pub-id-type="doi">10.1016/S0140-6736(09)61457-4</pub-id><pub-id pub-id-type="medline">19878986</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><collab>US Preventive Services Task Force</collab><name name-style="western"><surname>Davidson</surname><given-names>KW</given-names> </name><name name-style="western"><surname>Barry</surname><given-names>MJ</given-names> </name><etal/></person-group><article-title>Screening for prediabetes and type 2 diabetes: US Preventive Services Task Force recommendation statement</article-title><source>JAMA</source><year>2021</year><month>08</month><day>24</day><volume>326</volume><issue>8</issue><fpage>736</fpage><lpage>743</lpage><pub-id pub-id-type="doi">10.1001/jama.2021.12531</pub-id><pub-id pub-id-type="medline">34427594</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><collab>American Diabetes Association Professional Practice Committee</collab></person-group><article-title>Classification and diagnosis of diabetes: standards of medical care in diabetes&#x2014;2022</article-title><source>Diabetes Care</source><year>2022</year><month>01</month><day>1</day><volume>45</volume><issue>Supplement_1</issue><fpage>S17</fpage><lpage>S38</lpage><pub-id pub-id-type="doi">10.2337/dc22-S002</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bullard</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Ali</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Imperatore</surname><given-names>G</given-names> </name><etal/></person-group><article-title>Receipt of glucose testing and performance of two US diabetes screening guidelines, 2007-2012</article-title><source>PLoS ONE</source><year>2015</year><volume>10</volume><issue>4</issue><fpage>e0125249</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0125249</pub-id><pub-id pub-id-type="medline">25928306</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ali</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Imperatore</surname><given-names>G</given-names> </name><name name-style="western"><surname>Benoit</surname><given-names>SR</given-names> </name><etal/></person-group><article-title>Impact of changes in diabetes screening guidelines on testing eligibility and potential yield among adults without diagnosed diabetes in the United States</article-title><source>Diabetes Res Clin Pract</source><year>2023</year><month>03</month><volume>197</volume><fpage>110572</fpage><pub-id pub-id-type="doi">10.1016/j.diabres.2023.110572</pub-id><pub-id pub-id-type="medline">36775024</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Franciosi</surname><given-names>M</given-names> </name><name name-style="western"><surname>De Berardis</surname><given-names>G</given-names> </name><name name-style="western"><surname>Rossi</surname><given-names>MCE</given-names> </name><etal/></person-group><article-title>Use of the diabetes risk score for opportunistic screening of undiagnosed diabetes and impaired glucose tolerance: the IGLOO (Impaired Glucose Tolerance and Long-Term Outcomes Observational) study</article-title><source>Diabetes Care</source><year>2005</year><month>05</month><volume>28</volume><issue>5</issue><fpage>1187</fpage><lpage>1194</lpage><pub-id pub-id-type="doi">10.2337/diacare.28.5.1187</pub-id><pub-id pub-id-type="medline">15855587</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>YH</given-names> </name><name name-style="western"><surname>Bang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>HC</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>HM</given-names> </name><name name-style="western"><surname>Park</surname><given-names>SW</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>DJ</given-names> </name></person-group><article-title>A simple screening score for diabetes for the Korean population: development, validation, and comparison with other scores</article-title><source>Diabetes Care</source><year>2012</year><month>08</month><volume>35</volume><issue>8</issue><fpage>1723</fpage><lpage>1730</lpage><pub-id pub-id-type="doi">10.2337/dc11-2347</pub-id><pub-id pub-id-type="medline">22688547</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Edwards</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Bomback</surname><given-names>AS</given-names> </name><etal/></person-group><article-title>Development and validation of a patient self-assessment score for diabetes risk</article-title><source>Ann Intern Med</source><year>2009</year><month>12</month><day>1</day><volume>151</volume><issue>11</issue><fpage>775</fpage><lpage>783</lpage><pub-id pub-id-type="doi">10.7326/0003-4819-151-11-200912010-00005</pub-id><pub-id pub-id-type="medline">19949143</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Herman</surname><given-names>WH</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>PJ</given-names> </name><name name-style="western"><surname>Thompson</surname><given-names>TJ</given-names> </name><name name-style="western"><surname>Engelgau</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Aubert</surname><given-names>RE</given-names> </name></person-group><article-title>A new and simple questionnaire to identify people at increased risk for undiagnosed diabetes</article-title><source>Diabetes Care</source><year>1995</year><month>03</month><volume>18</volume><issue>3</issue><fpage>382</fpage><lpage>387</lpage><pub-id pub-id-type="doi">10.2337/diacare.18.3.382</pub-id><pub-id pub-id-type="medline">7555482</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Heikes</surname><given-names>KE</given-names> </name><name name-style="western"><surname>Eddy</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Arondekar</surname><given-names>B</given-names> </name><name name-style="western"><surname>Schlessinger</surname><given-names>L</given-names> </name></person-group><article-title>Diabetes risk calculator: a simple tool for detecting undiagnosed diabetes and pre-diabetes</article-title><source>Diabetes Care</source><year>2008</year><month>05</month><volume>31</volume><issue>5</issue><fpage>1040</fpage><lpage>1045</lpage><pub-id pub-id-type="doi">10.2337/dc07-1150</pub-id><pub-id pub-id-type="medline">18070993</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rolka</surname><given-names>DB</given-names> </name><name name-style="western"><surname>Narayan</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Thompson</surname><given-names>TJ</given-names> </name><etal/></person-group><article-title>Performance of recommended screening tests for undiagnosed diabetes and dysglycemia</article-title><source>Diabetes Care</source><year>2001</year><month>11</month><volume>24</volume><issue>11</issue><fpage>1899</fpage><lpage>1903</lpage><pub-id pub-id-type="doi">10.2337/diacare.24.11.1899</pub-id><pub-id pub-id-type="medline">11679454</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dhippayom</surname><given-names>T</given-names> </name><name name-style="western"><surname>Chaiyakunapruk</surname><given-names>N</given-names> </name><name name-style="western"><surname>Krass</surname><given-names>I</given-names> </name></person-group><article-title>How diabetes risk assessment tools are implemented in practice: a systematic review</article-title><source>Diabetes Res Clin Pract</source><year>2014</year><month>06</month><volume>104</volume><issue>3</issue><fpage>329</fpage><lpage>342</lpage><pub-id pub-id-type="doi">10.1016/j.diabres.2014.01.008</pub-id><pub-id pub-id-type="medline">24485859</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Maniruzzaman</surname><given-names>M</given-names> </name><name name-style="western"><surname>Rahman</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Al-MehediHasan</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Accurate diabetes risk stratification using machine learning: role of missing value and outliers</article-title><source>J Med Syst</source><year>2018</year><month>04</month><day>10</day><volume>42</volume><issue>5</issue><fpage>92</fpage><pub-id pub-id-type="doi">10.1007/s10916-018-0940-7</pub-id><pub-id pub-id-type="medline">29637403</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hasan</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Alam</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Das</surname><given-names>D</given-names> </name><name name-style="western"><surname>Hossain</surname><given-names>E</given-names> </name><name name-style="western"><surname>Hasan</surname><given-names>M</given-names> </name></person-group><article-title>Diabetes prediction using ensembling of different machine learning classifiers</article-title><source>IEEE Access</source><year>2020</year><volume>8</volume><fpage>76516</fpage><lpage>76531</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2020.2989857</pub-id><pub-id pub-id-type="medline">34812373</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sisodia</surname><given-names>D</given-names> </name><name name-style="western"><surname>Sisodia</surname><given-names>DS</given-names> </name></person-group><article-title>Prediction of diabetes using classification algorithms</article-title><source>Procedia Comput Sci</source><year>2018</year><volume>132</volume><fpage>1578</fpage><lpage>1585</lpage><pub-id pub-id-type="doi">10.1016/j.procs.2018.05.122</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Olivera</surname><given-names>AR</given-names> </name><name name-style="western"><surname>Roesler</surname><given-names>V</given-names> </name><name name-style="western"><surname>Iochpe</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Comparison of machine-learning algorithms to build a predictive model for detecting undiagnosed diabetes - ELSA-Brasil: accuracy study</article-title><source>Sao Paulo Med J</source><year>2017</year><volume>135</volume><issue>3</issue><fpage>234</fpage><lpage>246</lpage><pub-id pub-id-type="doi">10.1590/1516-3180.2016.0309010217</pub-id><pub-id pub-id-type="medline">28746659</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ryu</surname><given-names>KS</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>SW</given-names> </name><name name-style="western"><surname>Batbaatar</surname><given-names>E</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>JW</given-names> </name><name name-style="western"><surname>Choi</surname><given-names>KS</given-names> </name><name name-style="western"><surname>Cha</surname><given-names>HS</given-names> </name></person-group><article-title>A deep learning model for estimation of patients with undiagnosed diabetes</article-title><source>Appl Sci (Basel)</source><year>2020</year><volume>10</volume><issue>1</issue><fpage>421</fpage><pub-id pub-id-type="doi">10.3390/app10010421</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Choi</surname><given-names>SG</given-names> </name><name name-style="western"><surname>Oh</surname><given-names>M</given-names> </name><name name-style="western"><surname>Park</surname><given-names>DH</given-names> </name><etal/></person-group><article-title>Comparisons of the prediction models for undiagnosed diabetes between machine learning versus traditional statistical methods</article-title><source>Sci Rep</source><year>2023</year><month>08</month><day>11</day><volume>13</volume><issue>1</issue><fpage>13101</fpage><pub-id pub-id-type="doi">10.1038/s41598-023-40170-0</pub-id><pub-id pub-id-type="medline">37567907</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kavakiotis</surname><given-names>I</given-names> </name><name name-style="western"><surname>Tsave</surname><given-names>O</given-names> </name><name name-style="western"><surname>Salifoglou</surname><given-names>A</given-names> </name><name name-style="western"><surname>Maglaveras</surname><given-names>N</given-names> </name><name name-style="western"><surname>Vlahavas</surname><given-names>I</given-names> </name><name name-style="western"><surname>Chouvarda</surname><given-names>I</given-names> </name></person-group><article-title>Machine learning and data mining methods in diabetes research</article-title><source>Comput Struct Biotechnol J</source><year>2017</year><volume>15</volume><fpage>104</fpage><lpage>116</lpage><pub-id pub-id-type="doi">10.1016/j.csbj.2016.12.005</pub-id><pub-id pub-id-type="medline">28138367</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yu</surname><given-names>W</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>T</given-names> </name><name name-style="western"><surname>Valdez</surname><given-names>R</given-names> </name><name name-style="western"><surname>Gwinn</surname><given-names>M</given-names> </name><name name-style="western"><surname>Khoury</surname><given-names>MJ</given-names> </name></person-group><article-title>Application of support vector machine modeling for prediction of common diseases: the case of diabetes and pre-diabetes</article-title><source>BMC Med Inform Decis Mak</source><year>2010</year><month>03</month><day>22</day><volume>10</volume><issue>1</issue><fpage>16</fpage><pub-id pub-id-type="doi">10.1186/1472-6947-10-16</pub-id><pub-id pub-id-type="medline">20307319</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cichosz</surname><given-names>SL</given-names> </name><name name-style="western"><surname>Bender</surname><given-names>C</given-names> </name><name name-style="western"><surname>Hejlesen</surname><given-names>O</given-names> </name></person-group><article-title>A comparative analysis of machine learning models for the detection of undiagnosed diabetes patients</article-title><source>Diabetology</source><year>2024</year><volume>5</volume><issue>1</issue><fpage>1</fpage><lpage>11</lpage><pub-id pub-id-type="doi">10.3390/diabetology5010001</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Waring</surname><given-names>J</given-names> </name><name name-style="western"><surname>Lindvall</surname><given-names>C</given-names> </name><name name-style="western"><surname>Umeton</surname><given-names>R</given-names> </name></person-group><article-title>Automated machine learning: review of the state-of-the-art and opportunities for healthcare</article-title><source>Artif Intell Med</source><year>2020</year><month>04</month><volume>104</volume><fpage>101822</fpage><pub-id pub-id-type="doi">10.1016/j.artmed.2020.101822</pub-id><pub-id pub-id-type="medline">32499001</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Scott</surname><given-names>IA</given-names> </name><name name-style="western"><surname>De Guzman</surname><given-names>KR</given-names> </name><name name-style="western"><surname>Falconer</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Evaluating automated machine learning platforms for use in healthcare</article-title><source>JAMIA Open</source><year>2024</year><month>07</month><volume>7</volume><issue>2</issue><fpage>ooae031</fpage><pub-id pub-id-type="doi">10.1093/jamiaopen/ooae031</pub-id><pub-id pub-id-type="medline">38863963</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Luo</surname><given-names>G</given-names> </name></person-group><article-title>A review of automatic selection methods for machine learning algorithms and hyper-parameter values</article-title><source>Netw Model Anal Health Inform Bioinforma</source><year>2016</year><month>12</month><volume>5</volume><issue>1</issue><fpage>18</fpage><pub-id pub-id-type="doi">10.1007/s13721-016-0125-6</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>TC</given-names> </name><name name-style="western"><surname>Clark</surname><given-names>J</given-names> </name><name name-style="western"><surname>Riddles</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Mohadjer</surname><given-names>LK</given-names> </name><name name-style="western"><surname>Fakhouri</surname><given-names>THI</given-names> </name></person-group><article-title>National Health and Nutrition Examination Survey, 2015-2018: sample design and estimation procedures</article-title><source>Vital Health Stat 2</source><year>2020</year><month>04</month><issue>184</issue><fpage>1</fpage><lpage>35</lpage><pub-id pub-id-type="medline">33663649</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mirel</surname><given-names>LB</given-names> </name><name name-style="western"><surname>Mohadjer</surname><given-names>LK</given-names> </name><name name-style="western"><surname>Dohrmann</surname><given-names>SM</given-names> </name><etal/></person-group><article-title>National Health and Nutrition Examination Survey: estimation procedures, 2007-2010</article-title><source>Vital Health Stat 2</source><year>2013</year><month>08</month><volume>2</volume><issue>159</issue><fpage>1</fpage><lpage>17</lpage><pub-id pub-id-type="medline">25093338</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>El Emam</surname><given-names>K</given-names> </name><name name-style="western"><surname>Leung</surname><given-names>TI</given-names> </name><name name-style="western"><surname>Malin</surname><given-names>B</given-names> </name><name name-style="western"><surname>Klement</surname><given-names>W</given-names> </name><name name-style="western"><surname>Eysenbach</surname><given-names>G</given-names> </name></person-group><article-title>Consolidated Reporting Guidelines for Prognostic and Diagnostic Machine Learning Models (CREMLS)</article-title><source>J Med Internet Res</source><year>2024</year><month>05</month><day>2</day><volume>26</volume><fpage>e52508</fpage><pub-id pub-id-type="doi">10.2196/52508</pub-id><pub-id pub-id-type="medline">38696776</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Klement</surname><given-names>W</given-names> </name><name name-style="western"><surname>El Emam</surname><given-names>K</given-names> </name></person-group><article-title>Consolidated reporting guidelines for prognostic and diagnostic machine learning modeling studies: development and validation</article-title><source>J Med Internet Res</source><year>2023</year><month>08</month><day>31</day><volume>25</volume><fpage>e48763</fpage><pub-id pub-id-type="doi">10.2196/48763</pub-id><pub-id pub-id-type="medline">37651179</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>ElSayed</surname><given-names>NA</given-names> </name><name name-style="western"><surname>Aleppo</surname><given-names>G</given-names> </name><name name-style="western"><surname>Bannuru</surname><given-names>RR</given-names> </name><etal/></person-group><article-title>Diagnosis and classification of diabetes: standards of care in diabetes&#x2014;2024</article-title><source>Diabetes Care</source><year>2024</year><month>01</month><day>1</day><volume>47</volume><issue>Supplement_1</issue><fpage>S20</fpage><lpage>S42</lpage><pub-id pub-id-type="doi">10.2337/dc24-S002</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="web"><article-title>NHANES 2005-2006 data documentation, codebook, and frequencies</article-title><source>Centers for Disease Control and Prevention</source><access-date>2025-01-27</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://wwwn.cdc.gov/Nchs/Nhanes/2005-2006/PAQIAF_D.htm">https://wwwn.cdc.gov/Nchs/Nhanes/2005-2006/PAQIAF_D.htm</ext-link></comment></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>LeDell</surname><given-names>E</given-names> </name><name name-style="western"><surname>Poirier</surname><given-names>S</given-names> </name></person-group><article-title>H2O AutoML: scalable automatic machine learning</article-title><year>2020</year><month>07</month><day>18</day><access-date>2025-01-27</access-date><conf-name>7th ICML Workshop on Automated Machine Learning</conf-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.automl.org/wp-content/uploads/2020/07/AutoML_2020_paper_61.pdf">https://www.automl.org/wp-content/uploads/2020/07/AutoML_2020_paper_61.pdf</ext-link></comment></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chawla</surname><given-names>NV</given-names> </name><name name-style="western"><surname>Bowyer</surname><given-names>KW</given-names> </name><name name-style="western"><surname>Hall</surname><given-names>LO</given-names> </name><name name-style="western"><surname>Kegelmeyer</surname><given-names>WP</given-names> </name></person-group><article-title>SMOTE: Synthetic Minority Over-Sampling Technique</article-title><source>J Artif Intell Res</source><year>2002</year><volume>16</volume><fpage>321</fpage><lpage>357</lpage><pub-id pub-id-type="doi">10.1613/jair.953</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Blagus</surname><given-names>R</given-names> </name><name name-style="western"><surname>Lusa</surname><given-names>L</given-names> </name></person-group><article-title>SMOTE for high-dimensional class-imbalanced data</article-title><source>BMC Bioinformatics</source><year>2013</year><month>03</month><day>22</day><volume>14</volume><fpage>106</fpage><pub-id pub-id-type="doi">10.1186/1471-2105-14-106</pub-id><pub-id pub-id-type="medline">23522326</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Samson</surname><given-names>SL</given-names> </name><name name-style="western"><surname>Vellanki</surname><given-names>P</given-names> </name><name name-style="western"><surname>Blonde</surname><given-names>L</given-names> </name><etal/></person-group><article-title>American Association of Clinical Endocrinology consensus statement: comprehensive type 2 diabetes management algorithm 2023 update</article-title><source>Endocr Pract</source><year>2023</year><month>05</month><volume>29</volume><issue>5</issue><fpage>305</fpage><lpage>340</lpage><pub-id pub-id-type="doi">10.1016/j.eprac.2023.02.001</pub-id><pub-id pub-id-type="medline">37150579</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="web"><article-title>Code of federal regulations</article-title><source>US Department of Health and Human Services</source><access-date>2024-9-30</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.ecfr.gov/current/title-45/subtitle-A/subchapter-A/part-46/subpart-A/section-46.104">https://www.ecfr.gov/current/title-45/subtitle-A/subchapter-A/part-46/subpart-A/section-46.104</ext-link></comment></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Plantinga</surname><given-names>LC</given-names> </name><name name-style="western"><surname>Crews</surname><given-names>DC</given-names> </name><name name-style="western"><surname>Coresh</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Prevalence of chronic kidney disease in US adults with undiagnosed diabetes or prediabetes</article-title><source>Clin J Am Soc Nephrol</source><year>2010</year><month>04</month><volume>5</volume><issue>4</issue><fpage>673</fpage><lpage>682</lpage><pub-id pub-id-type="doi">10.2215/CJN.07891109</pub-id><pub-id pub-id-type="medline">20338960</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Casagrande</surname><given-names>SS</given-names> </name><name name-style="western"><surname>Cowie</surname><given-names>CC</given-names> </name><name name-style="western"><surname>Fradkin</surname><given-names>JE</given-names> </name></person-group><article-title>Utility of the U.S. Preventive Services Task Force criteria for diabetes screening</article-title><source>Am J Prev Med</source><year>2013</year><month>08</month><volume>45</volume><issue>2</issue><fpage>167</fpage><lpage>174</lpage><pub-id pub-id-type="doi">10.1016/j.amepre.2013.02.026</pub-id><pub-id pub-id-type="medline">23867023</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Young</surname><given-names>TK</given-names> </name><name name-style="western"><surname>Mustard</surname><given-names>CA</given-names> </name></person-group><article-title>Undiagnosed diabetes: does it matter?</article-title><source>CMAJ</source><year>2001</year><month>01</month><day>9</day><volume>164</volume><issue>1</issue><fpage>24</fpage><lpage>28</lpage><pub-id pub-id-type="medline">11202663</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tenenbaum</surname><given-names>A</given-names> </name><name name-style="western"><surname>Motro</surname><given-names>M</given-names> </name><name name-style="western"><surname>Fisman</surname><given-names>EZ</given-names> </name><etal/></person-group><article-title>Clinical impact of borderline and undiagnosed diabetes mellitus in patients with coronary artery disease</article-title><source>Am J Cardiol</source><year>2000</year><month>12</month><day>15</day><volume>86</volume><issue>12</issue><fpage>1363</fpage><lpage>1366</lpage><pub-id pub-id-type="doi">10.1016/s0002-9149(00)01244-3</pub-id><pub-id pub-id-type="medline">11113414</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Li</surname><given-names>X</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Trends in prevalence of diabetes and control of risk factors in diabetes among US adults, 1999-2018</article-title><source>JAMA</source><year>2021</year><month>06</month><day>25</day><volume>326</volume><issue>8</issue><fpage>1</fpage><lpage>13</lpage><pub-id pub-id-type="doi">10.1001/jama.2021.9883</pub-id><pub-id pub-id-type="medline">34170288</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Khan</surname><given-names>B</given-names> </name><name name-style="western"><surname>Fatima</surname><given-names>H</given-names> </name><name name-style="western"><surname>Qureshi</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Drawbacks of artificial intelligence and their potential solutions in the healthcare sector</article-title><source>Biomed Mater Devices</source><year>2023</year><month>02</month><day>8</day><volume>8 2023</volume><fpage>1</fpage><lpage>8</lpage><pub-id pub-id-type="doi">10.1007/s44174-023-00063-2</pub-id><pub-id pub-id-type="medline">36785697</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Habehh</surname><given-names>H</given-names> </name><name name-style="western"><surname>Gohel</surname><given-names>S</given-names> </name></person-group><article-title>Machine learning in healthcare</article-title><source>Curr Genomics</source><year>2021</year><month>12</month><day>16</day><volume>22</volume><issue>4</issue><fpage>291</fpage><lpage>300</lpage><pub-id pub-id-type="doi">10.2174/1389202922666210705124359</pub-id><pub-id pub-id-type="medline">35273459</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>A Romero</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Y Deypalan</surname><given-names>MN</given-names> </name><name name-style="western"><surname>Mehrotra</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Benchmarking AutoML frameworks for disease prediction using medical claims</article-title><source>BioData Min</source><year>2022</year><month>07</month><day>26</day><volume>15</volume><issue>1</issue><fpage>15</fpage><pub-id pub-id-type="doi">10.1186/s13040-022-00300-2</pub-id><pub-id pub-id-type="medline">35883154</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rashidi</surname><given-names>HH</given-names> </name><name name-style="western"><surname>Tran</surname><given-names>N</given-names> </name><name name-style="western"><surname>Albahra</surname><given-names>S</given-names> </name><name name-style="western"><surname>Dang</surname><given-names>LT</given-names> </name></person-group><article-title>Machine learning in health care and laboratory medicine: general overview of supervised learning and Auto-ML</article-title><source>Int J Lab Hematol</source><year>2021</year><month>07</month><volume>43 Suppl 1</volume><fpage>15</fpage><lpage>22</lpage><pub-id pub-id-type="doi">10.1111/ijlh.13537</pub-id><pub-id pub-id-type="medline">34288435</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Raj</surname><given-names>R</given-names> </name><name name-style="western"><surname>Kannath</surname><given-names>SK</given-names> </name><name name-style="western"><surname>Mathew</surname><given-names>J</given-names> </name><name name-style="western"><surname>Sylaja</surname><given-names>PN</given-names> </name></person-group><article-title>AutoML accurately predicts endovascular mechanical thrombectomy in acute large vessel ischemic stroke</article-title><source>Front Neurol</source><year>2023</year><volume>14</volume><fpage>1259958</fpage><pub-id pub-id-type="doi">10.3389/fneur.2023.1259958</pub-id><pub-id pub-id-type="medline">37840939</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Shi</surname><given-names>R</given-names> </name><name name-style="western"><surname>Wei</surname><given-names>D</given-names> </name><etal/></person-group><article-title>MedMNIST v2&#x2014;a large-scale lightweight benchmark for 2D and 3D biomedical image classification</article-title><source>Sci Data</source><year>2023</year><month>01</month><day>19</day><volume>10</volume><issue>1</issue><fpage>41</fpage><pub-id pub-id-type="doi">10.1038/s41597-022-01721-8</pub-id><pub-id pub-id-type="medline">36658144</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhuhadar</surname><given-names>LP</given-names> </name><name name-style="western"><surname>Lytras</surname><given-names>MD</given-names> </name></person-group><article-title>The application of AutoML techniques in diabetes diagnosis: current approaches, performance, and future directions</article-title><source>Sustainability</source><year>2023</year><volume>15</volume><issue>18</issue><fpage>13484</fpage><pub-id pub-id-type="doi">10.3390/su151813484</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mohsen</surname><given-names>F</given-names> </name><name name-style="western"><surname>Biswas</surname><given-names>MR</given-names> </name><name name-style="western"><surname>Ali</surname><given-names>H</given-names> </name><name name-style="western"><surname>Alam</surname><given-names>T</given-names> </name><name name-style="western"><surname>Househ</surname><given-names>M</given-names> </name><name name-style="western"><surname>Shah</surname><given-names>Z</given-names> </name></person-group><article-title>Customized and automated machine learning-based models for diabetes type 2 classification</article-title><source>Stud Health Technol Inform</source><year>2022</year><month>06</month><day>29</day><volume>295</volume><fpage>517</fpage><lpage>520</lpage><pub-id pub-id-type="doi">10.3233/SHTI220779</pub-id><pub-id pub-id-type="medline">35773925</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Khushi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Shaukat</surname><given-names>K</given-names> </name><name name-style="western"><surname>Alam</surname><given-names>TM</given-names> </name><etal/></person-group><article-title>A comparative performance analysis of data resampling methods on imbalance medical data</article-title><source>IEEE Access</source><year>2021</year><volume>9</volume><fpage>109960</fpage><lpage>109975</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2021.3102399</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Echouffo-Tcheugui</surname><given-names>JB</given-names> </name><name name-style="western"><surname>Selvin</surname><given-names>E</given-names> </name></person-group><article-title>Prediabetes and what it means: the epidemiological evidence</article-title><source>Annu Rev Public Health</source><year>2021</year><month>04</month><day>1</day><volume>42</volume><fpage>59</fpage><lpage>77</lpage><pub-id pub-id-type="doi">10.1146/annurev-publhealth-090419-102644</pub-id><pub-id pub-id-type="medline">33355476</pub-id></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Skyler</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Bakris</surname><given-names>GL</given-names> </name><name name-style="western"><surname>Bonifacio</surname><given-names>E</given-names> </name><etal/></person-group><article-title>Differentiation of diabetes by pathophysiology, natural history, and prognosis</article-title><source>Diabetes</source><year>2017</year><month>02</month><volume>66</volume><issue>2</issue><fpage>241</fpage><lpage>255</lpage><pub-id pub-id-type="doi">10.2337/db16-0806</pub-id><pub-id pub-id-type="medline">27980006</pub-id></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huguet</surname><given-names>N</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>J</given-names> </name><name name-style="western"><surname>Parikh</surname><given-names>RB</given-names> </name><etal/></person-group><article-title>Applying machine learning techniques to implementation science</article-title><source>Online J Public Health Inform</source><year>2024</year><month>04</month><day>22</day><volume>16</volume><fpage>e50201</fpage><pub-id pub-id-type="doi">10.2196/50201</pub-id><pub-id pub-id-type="medline">38648094</pub-id></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sarker</surname><given-names>IH</given-names> </name></person-group><article-title>Machine learning: algorithms, real-world applications and research directions</article-title><source>SN Comput Sci</source><year>2021</year><volume>2</volume><issue>3</issue><fpage>160</fpage><pub-id pub-id-type="doi">10.1007/s42979-021-00592-x</pub-id><pub-id pub-id-type="medline">33778771</pub-id></nlm-citation></ref><ref id="ref63"><label>63</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shick</surname><given-names>AA</given-names> </name><name name-style="western"><surname>Webber</surname><given-names>CM</given-names> </name><name name-style="western"><surname>Kiarashi</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Transparency of artificial intelligence/machine learning-enabled medical devices</article-title><source>NPJ Digit Med</source><year>2024</year><month>01</month><day>26</day><volume>7</volume><issue>1</issue><fpage>21</fpage><pub-id pub-id-type="doi">10.1038/s41746-023-00992-8</pub-id><pub-id pub-id-type="medline">38273098</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Model summary for stacked ensemble.</p><media xlink:href="ai_v4i1e68260_app1.docx" xlink:title="DOCX File, 16 KB"/></supplementary-material></app-group></back></article>