<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR AI</journal-id><journal-id journal-id-type="publisher-id">ai</journal-id><journal-id journal-id-type="index">41</journal-id><journal-title>JMIR AI</journal-title><abbrev-journal-title>JMIR AI</abbrev-journal-title><issn pub-type="epub">2817-1705</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v4i1e62985</article-id><article-id pub-id-type="doi">10.2196/62985</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Limitations of Binary Classification for Long-Horizon Diagnosis Prediction and Advantages of a Discrete-Time Time-to-Event Approach: Empirical Analysis</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Loh</surname><given-names>De Rong</given-names></name><degrees>BSc</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Hill</surname><given-names>Elliot D</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Liu</surname><given-names>Nan</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Dawson</surname><given-names>Geraldine</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Engelhard</surname><given-names>Matthew M</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib></contrib-group><aff id="aff1"><institution>Duke-NUS Medical School</institution><addr-line>8 College Road</addr-line><addr-line>Singapore</addr-line><country>Singapore</country></aff><aff id="aff2"><institution>Department of Biostatistics and Bioinformatics, Duke University School of Medicine</institution><addr-line>Durham</addr-line><addr-line>NC</addr-line><country>United States</country></aff><aff id="aff3"><institution>Department of Psychiatry and Behavioral Sciences, Duke University School of Medicine</institution><addr-line>Durham</addr-line><addr-line>NC</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Malin</surname><given-names>Bradley</given-names></name></contrib><contrib contrib-type="editor"><name name-style="western"><surname>Emam</surname><given-names>Khaled El</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Aria</surname><given-names>Mehrad</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Sengupta</surname><given-names>Saurav</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Ruan</surname><given-names>Xiaoyang</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to De Rong Loh, BSc, Duke-NUS Medical School, 8 College Road, Singapore, 169857, Singapore, 65 97505085; <email>derong@u.duke.nus.edu</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>27</day><month>3</month><year>2025</year></pub-date><volume>4</volume><elocation-id>e62985</elocation-id><history><date date-type="received"><day>07</day><month>06</month><year>2024</year></date><date date-type="rev-recd"><day>23</day><month>02</month><year>2025</year></date><date date-type="accepted"><day>23</day><month>02</month><year>2025</year></date></history><copyright-statement>&#x00A9; De Rong Loh, Elliot D Hill, Nan Liu, Geraldine Dawson, Matthew M Engelhard. Originally published in JMIR AI (<ext-link ext-link-type="uri" xlink:href="https://ai.jmir.org">https://ai.jmir.org</ext-link>), 27.3.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR AI, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.ai.jmir.org/">https://www.ai.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://ai.jmir.org/2025/1/e62985"/><abstract><sec><title>Background</title><p>A major challenge in using electronic health records (EHR) is the inconsistency of patient follow-up, resulting in right-censored outcomes. This becomes particularly problematic in long-horizon event predictions, such as autism and attention-deficit/hyperactivity disorder (ADHD) diagnoses, where a significant number of patients are lost to follow-up before the outcome can be observed. Consequently, fully supervised methods such as binary classification (BC), which are trained to predict observed diagnoses, are substantially affected by the probability of sufficient follow-up, leading to biased results.</p></sec><sec><title>Objective</title><p>This empirical analysis aims to characterize BC&#x2019;s inherent limitations for long-horizon diagnosis prediction from EHR; and quantify the benefits of a specific time-to-event (TTE) approach, the discrete-time neural network (DTNN).</p></sec><sec sec-type="methods"><title>Methods</title><p>Records within the Duke University Health System EHR were analyzed, extracting features such as <italic>ICD-10</italic> (<italic>International Classification of Diseases, Tenth Revision</italic>) diagnosis codes, medications, laboratories, and procedures. We compared a DTNN to 3 BC approaches and a deep Cox proportional hazards model across 4 clinical conditions to examine distributional patterns across various subgroups. Time-varying area under the receiving operating characteristic curve (AUC<sub>t</sub>) and time-varying average precision (AP<sub>t</sub>) were our primary evaluation metrics.</p></sec><sec sec-type="results"><title>Results</title><p>TTE models consistently had comparable or higher AUC<sub>t</sub> and AP<sub>t</sub> than BC for all conditions. At clinically relevant operating time points, the area under the receiving operating characteristic curve (AUC) values for DTNN<sub>YOB&#x2264;2020</sub> (year-of-birth) and DCPH<sub>YOB&#x2264;2020</sub> (deep Cox proportional hazard) were 0.70 (95% CI 0.66&#x2010;0.77) and 0.72 (95% CI 0.66&#x2010;0.78) at <italic>t</italic>=5 for autism, 0.72 (95% CI 0.65&#x2010;0.76) and 0.68 (95% CI 0.62&#x2010;0.74) at <italic>t</italic>=7 for ADHD, 0.72 (95% CI 0.70&#x2010;0.75) and 0.71 (95% CI 0.69&#x2010;0.74) at <italic>t</italic>=1 for recurrent otitis media, and 0.74 (95% CI 0.68&#x2010;0.82) and 0.71 (95% CI 0.63&#x2010;0.77) at <italic>t</italic>=1 for food allergy, compared to 0.6 (95% CI 0.55&#x2010;0.66), 0.47 (95% CI 0.40&#x2010;0.54), 0.73 (95% CI 0.70&#x2010;0.75), and 0.77 (95% CI 0.71&#x2010;0.82) for BC<sub>YOB&#x2264;2020</sub>, respectively. The probabilities predicted by BC models were positively correlated with censoring times, particularly for autism and ADHD prediction. Filtering strategies based on YOB or length of follow-up only partially corrected these biases. In subgroup analyses, only DTNN predicted diagnosis probabilities that accurately reflect actual clinical prevalence and temporal trends.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>BC models substantially underpredicted diagnosis likelihood and inappropriately assigned lower probability scores to individuals with earlier censoring. Common filtering strategies did not adequately address this limitation. TTE approaches, particularly DTNN, effectively mitigated bias from the censoring distribution, resulting in superior discrimination and calibration performance and more accurate prediction of clinical prevalence. Machine learning practitioners should recognize the limitations of BC for long-horizon diagnosis prediction and adopt TTE approaches. The DTNN in particular is well-suited to mitigate the effects of right-censoring and maximize prediction performance in this setting.</p></sec></abstract><kwd-group><kwd>machine learning</kwd><kwd>artificial intelligence</kwd><kwd>deep learning</kwd><kwd>predictive models</kwd><kwd>practical models</kwd><kwd>early detection</kwd><kwd>electronic health records</kwd><kwd>right-censoring</kwd><kwd>survival analysis</kwd><kwd>distributional shifts</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Electronic health records (EHR) are a rich source of data that can be used to develop effective clinical prediction models to improve patient care [<xref ref-type="bibr" rid="ref1">1</xref>]. However, a major challenge is that patients have inconsistent follow-ups, leading to right-censored outcomes, and follow-up length typically depends on observed covariates. This challenge is exacerbated in long-horizon event prediction, such as prediction of an autism and attention-deficit/hyperactivity disorder (ADHD) diagnosis early in life, because many patients are lost to follow-up before the outcome can be observed. Consequently, the probability of observing a diagnosis depends not only on the probability of diagnosis but also on the probability of sufficient follow-up (ie, the probability that diagnosis occurs before censoring). As a result, binary classification (BC) models trained to predict observed diagnoses are substantially affected by the probability of sufficient follow-up unless filtering strategies are carefully applied [<xref ref-type="bibr" rid="ref2">2</xref>].</p><p>A common filtering strategy to mitigate this effect is to exclude all individuals with insufficient follow-up. However, this is not feasible for many long-term prediction tasks. For example, sufficient follow-up for ADHD would extend into adolescence and adulthood; therefore, this criterion would preclude the development of early ADHD prediction models. Even in cases where such a criterion is feasible, it can significantly reduce the sample size available for learning and introduce systematic biases [<xref ref-type="bibr" rid="ref3">3</xref>], as it tends to exclude subpopulations with shorter follow-up, including disadvantaged groups.</p><p>Time-to-event (TTE; ie, survival analysis) methods are the natural alternative, as they are designed for right-censored outcomes. Various versions of classification trees and random forests [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>], Bayesian networks [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>], Cox proportional hazards regression [<xref ref-type="bibr" rid="ref8">8</xref>] and neural networks [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>] have been applied to survival data with mixed success, and have been adapted to the EHR setting [<xref ref-type="bibr" rid="ref11">11</xref>]. Deep learning [<xref ref-type="bibr" rid="ref12">12</xref>] models such as DeepSurv [<xref ref-type="bibr" rid="ref13">13</xref>] or deep Cox proportional hazards (DCPHs), which follow the Cox proportional hazards framework but uses a neural network to predict the log-hazard ratio, have become popular for EHR prediction tasks. Neural network-based TTE approaches are advantageous because they can efficiently process large, unstructured, high-dimensional inputs and capture complex nonlinear relationships between features and outcomes.</p><p>However, common TTE approaches also have limitations relevant to long-horizon diagnosis prediction. Unlike in survival analysis, the event of interest never occurs in most patients, and typically we are more concerned with predicting diagnosis probability than predicting diagnosis timing. Consequently, approaches that predict the probability of diagnosis separately from its timing [<xref ref-type="bibr" rid="ref14">14</xref>] are well-suited for long-horizon diagnosis prediction, whereas DCPH and other approaches that assume relative likelihood does not change over time are less appropriate. These considerations motivate our current work to use a discrete-time neural network (DTNN), which combines the benefits of BC and TTE approaches.</p><p>First, the DTNN offers significant flexibility. Specifically, it does not assume a particular parametric form for the event time density, and in particular, allows the effect of covariates on risk to vary across the time horizon. Second, the DTNN predicts the probability of no-event within the time horizon, which is useful in diagnosis prediction where the event of interest may often not occur. For these reasons, we have found DTNN to be advantageous in our work.</p><p>In this paper, we examine the advantages of the DTNN approach compared to BC and DCPH across 4 long-horizon, EHR-based event prediction tasks. We hypothesize that the DTNN approach will yield higher discrimination performance and more accurate likelihood predictions compared to BC even after common filtering strategies are applied due to the inability of BC to disentangle the probability of diagnosis from that of insufficient follow-up. We further hypothesize that DTNN performance will be higher than DCPH, and DTNN predictions will better reflect real-world clinical prevalence and patterns. The code for our work is available online [<xref ref-type="bibr" rid="ref15">15</xref>].</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Ethical Considerations</title><p>All study procedures were approved by the Duke Health Institutional Review Board (Pro00111224) and comply with institutional policies and federal regulations. A waiver of participant consent was approved due to the minimal risk posed by study procedures and the infeasibility of obtaining consent in a large retrospective cohort. No compensation was provided to the participants. Identifiers were omitted during analysis, which was executed within the Duke PACE (Protected Analytics Computing Environment), a highly secure virtual network space designed for protected health information.</p></sec><sec id="s2-2"><title>Cohort Identification</title><p>Analyses were based on inpatient and outpatient encounters within the Duke University Health System (DUHS), a large academic medical center based in Durham, NC. DUHS provides care to approximately 85% of children in Durham and surrounding Durham County, which has a diverse population with varying demographic and socioeconomic status [<xref ref-type="bibr" rid="ref16">16</xref>]. Records were extracted from the current (2014&#x2010;2023) DUHS EHR, which is based on the platform developed by Epic.</p><p>Study inclusion criteria were the following: (1) date of birth between January 1, 2014 and October 29, 2022; and (2)&#x2009;&#x2265;1 visit within the DUHS before aging 30 days. DUHS encounters between January 1, 2014 and June 2, 2023 were extracted for individuals meeting these criteria. See Figure S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for the distribution of year of birth for this identified cohort.</p></sec><sec id="s2-3"><title>Diagnosis Identification</title><p>We focused on 4 clinical diagnoses: autism spectrum disorder (autism), ADHD, recurrent otitis media (ROM), and food allergy (FA). We used computable phenotypes previously established within DUHS [<xref ref-type="bibr" rid="ref17">17</xref>] or formulated in consultation with clinicians. The classification criteria are provided in Tables S1 and S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec><sec id="s2-4"><title>Experimental Setup</title><p>BC models predicting observed diagnoses are significantly influenced by adequate follow-up probabilities, requiring meticulous filtering strategies. We first conducted baseline experiments to establish the performance of BC models with and without exclusion criteria based on year-of-birth (YOB) or follow-up length. Correspondingly, we have 3 models trained on different cohort subsets, which are denoted as BC<sub>YOB&#x2264;2020</sub>, BC<sub>YOB&#x2264;2018</sub>, and BC<sub>t&#x2265;5</sub> (where t denotes follow-up length). The upper limit of the dataset for the prediction tasks was capped at 2020 due to the rarity of autism and ADHD diagnoses before the age of 2 years (<xref ref-type="fig" rid="figure1">Figure 1</xref>). For subset YOB &#x2264;2018, we excluded all children who were age younger than 5 years at the end of our observation window to limit effects of early censoring on model predictions. For subset <italic>t</italic>&#x2265;5, we excluded all children with &#x003C;5 years of follow-up as a more aggressive measure; note that this subset overlaps the subset YOB&#x2264;2018. Next, we introduced 2 TTE models, namely DTNN<sub>YOB&#x2264;2020</sub> and DCPH<sub>YOB&#x2264;2020</sub>, and evaluated their performance against the 3 BC approaches. To summarize, we explored the effect of each setup when training the corresponding model to predict each of the 4 conditions, yielding 20 models in total.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Distribution of observed diagnosis ages in years (upper panel) and months (lower panel). Children with diagnoses before respective diagnosis age cutoffs (marked by the red line) were excluded. Note that there were 2 ADHD diagnoses before the age cutoff of 3 years. ADHD: attention-deficit/hyperactivity disorder; FA: food allergy; ROM: recurrent otitis media.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e62985_fig01.png"/></fig><p>Our features were based on encounters taking place before the following predefined, condition-specific prediction ages: 15 months, 3 years, 4 months, and 3 months for autism, ADHD, ROM, and FA, respectively (<xref ref-type="fig" rid="figure1">Figure 1</xref>). These ages were chosen to be clinically useful prediction times that were earlier than most observed diagnoses. Individuals diagnosed or censored before these cutoffs were excluded from the analysis. To prevent temporal data leakage, the events used for prediction were limited to those taking place before the first diagnosis code (<italic>ICD-10</italic> [<italic>International Classification of Diseases, Tenth Revision</italic>]) associated with the outcome of interest. The distribution of censoring ages can be found in Figure S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><p>The use of predefined diagnosis age cutoffs was a deliberate design decision. First, we aimed to demonstrate the predictive value of detection models based solely on EHR data collected from early ages [<xref ref-type="bibr" rid="ref17">17</xref>]. Second, using fixed age-offs standardizes the data collection period for all individuals, which simplifies analysis and ensures consistency across the dataset. This approach allows us to focus on understanding model performance across various clinical conditions without the additional complexity of time-dependent updates.</p><p>For each diagnosis, the dataset was partitioned randomly, allocating 60% for training, 20% for validation, and 20% for testing.</p></sec><sec id="s2-5"><title>Model Development</title><sec id="s2-5-1"><title>Overview</title><p>Each observation was represented by the triplet <inline-formula><mml:math id="ieqn1"><mml:mo>{</mml:mo><mml:mi>X</mml:mi><mml:mo>,</mml:mo><mml:mi>T</mml:mi><mml:mo>,</mml:mo><mml:mi>S</mml:mi><mml:mo>}</mml:mo></mml:math></inline-formula>, where <inline-formula><mml:math id="ieqn2"><mml:mi>X</mml:mi><mml:mo>&#x2286;</mml:mo><mml:msup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> is a <italic>d</italic>-dimensional feature vector, <inline-formula><mml:math id="ieqn3"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>T</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:msub><mml:mi>E</mml:mi><mml:mrow><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">]</mml:mo></mml:mrow></mml:mstyle></mml:math></inline-formula> is an observed event or censoring time over a finite time horizon, and <inline-formula><mml:math id="ieqn4"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>S</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:mstyle></mml:math></inline-formula> indicates whether <italic>T</italic> is a right-censoring time (<italic>S=0</italic>) or an event time (<italic>S=1</italic>). The observed time <italic>T</italic> is the minimum of the event time <italic>E</italic> and the right-censoring time <italic>C</italic>, that is, <inline-formula><mml:math id="ieqn5"><mml:mi>T</mml:mi><mml:mo>=</mml:mo><mml:mi>m</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mo>(</mml:mo><mml:mi>E</mml:mi><mml:mo>,</mml:mo><mml:mi> </mml:mi><mml:mi>C</mml:mi><mml:mo>)</mml:mo></mml:math></inline-formula>.</p><p>The model selection process began with experimenting with different combinations of fully connected layers and transformer architectures. See <xref ref-type="fig" rid="figure2">Figure 2</xref> for the final model architectures.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Model architectures of DTNN, DCPH, and BC. BC: binary classification; DCPH: deep Cox proportional hazard; DTNN: discrete-time neural network; FC: fully connected; MLP: multilayer perceptron; ReLU: rectified linear unit.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e62985_fig02.png"/></fig></sec><sec id="s2-5-2"><title>Pretraining Medical Concept Embeddings</title><p>Patient histories were represented as timestamped sequences of DUHS EHR events, including <italic>ICD-10</italic> diagnosis codes, medications (RxNorm [<xref ref-type="bibr" rid="ref18">18</xref>] codes), procedures (Current Procedural Terminology [<xref ref-type="bibr" rid="ref19">19</xref>] codes), and laboratories (Logical Observation Identifiers Names and Codes [<xref ref-type="bibr" rid="ref20">20</xref>] codes). Events were mapped to corresponding Word2Vec embeddings, which were learned by training the model on these event sequences to capture contextual relationships between codes. The model used a Continuous Bag of Words approach with negative sampling, producing embeddings of size 256. Padding and out-of-vocabulary indices were also included and mapped to a vector of zeroes. Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> details the hyperparameters used during the training process.</p></sec><sec id="s2-5-3"><title>Encoder Architecture</title><p>The BC and TTE models all shared a common underlying encoder architecture comprised of (1) an embedding layer, (2) a fully connected layer with rectified linear unit activation applied in parallel to each embedding, (3) a global mean pooling layer, and (4) a fully connected layer with rectified linear unit activation. The embedding layer was initialized with frozen pretrained weights from the Word2Vec model. The sequence length was fixed at 512. Shorter sequences were padded, while longer sequences were truncated by selecting the most recent events preceding the age cutoff for a given model. The mean pooling layer was applied across the sequence dimension, resulting in a single fixed-length vector with dimension equal to that of the embeddings.</p></sec><sec id="s2-5-4"><title>Prediction Head</title><p>In DTNN, the prediction head was a single fully connected hidden layer with Softmax activation, producing a probability distribution across multiple bins. The bin boundaries can be found in Table S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. Under the common assumption of noninformative right-censoring, we may ignore the censoring density and optimize the likelihood <inline-formula><mml:math id="ieqn6"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>P</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mi>s</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi>x</mml:mi><mml:mo>;</mml:mo><mml:mi>&#x03B8;</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mstyle></mml:math></inline-formula> over the observed data <inline-formula><mml:math id="ieqn7"><mml:mi>D</mml:mi><mml:mo>=</mml:mo><mml:msubsup><mml:mrow><mml:mo>{</mml:mo><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mi> </mml:mi><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>}</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> by minimizing the following loss:</p><disp-formula id="equWL1"><mml:math id="eqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>M</mml:mi><mml:mi>L</mml:mi><mml:mi>E</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>&#x03B8;</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mi>log</mml:mi><mml:mo>&#x2061;</mml:mo><mml:msub><mml:mi>p</mml:mi><mml:mrow><mml:mrow><mml:mi>&#x03B8;</mml:mi></mml:mrow></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>t</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mi>log</mml:mi><mml:mo>&#x2061;</mml:mo><mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mi>&#x03B8;</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>t</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn8"><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03B8;</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the survival function associated with <inline-formula><mml:math id="ieqn9"><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x03B8;</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and <italic>T</italic> has been discretized such that each <inline-formula><mml:math id="ieqn10"><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> indicates which interval contains <inline-formula><mml:math id="ieqn11"><mml:mi>m</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mo>(</mml:mo><mml:mi>E</mml:mi><mml:mo>,</mml:mo><mml:mi> </mml:mi><mml:mi>C</mml:mi><mml:mo>)</mml:mo></mml:math></inline-formula>.</p><p>In BC and DCPH, the prediction head was a fully connected hidden layer predicting the log-odds and log-hazard ratio, respectively, with corresponding binary cross entropy or cox negative partial log-likelihood [<xref ref-type="bibr" rid="ref21">21</xref>] loss. Whereas BC directly predicts the probability that diagnosis will be observed (by applying the logistic function to the predicted log-odds), with DCPH this probability may be derived from the predicted log-hazard ratio and baseline hazard function. Note that for BC, we assumed a constant predicted probability irrespective of the time point.</p></sec><sec id="s2-5-5"><title>Hyperparameter Tuning</title><p>The hyperparameters, consisting of learning rate and weight decay, were then chosen through a grid search to minimize loss on the validation set (Table S5 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). These optimized models were subsequently used for evaluation on the test set.</p></sec></sec><sec id="s2-6"><title>Model Evaluation</title><sec id="s2-6-1"><title>Calibration Curves</title><p>The BC models were evaluated using the probability calibration module from the <italic>scikit-learn</italic> library [<xref ref-type="bibr" rid="ref22">22</xref>], while the TTE models were evaluated by comparing the observed probabilities (ie, estimated survival probabilities of the Kaplan-Meier estimator) and the predicted probabilities at selected time intervals [<xref ref-type="bibr" rid="ref23">23</xref>].</p></sec><sec id="s2-6-2"><title>Performance Metrics</title><p>Our primary evaluation metrics were the time-varying area under the receiving operating characteristic curve (AUC<sub>t</sub>) and time-varying average precision (AP<sub>t</sub>) [<xref ref-type="bibr" rid="ref24">24</xref>], which quantify the model&#x2019;s ability to discriminate between individuals diagnosed before the age t (positives; S<italic>=1,</italic> t&#x2264;t) and individuals remaining event-free beyond age t (negatives; t&#x003E;t). This time-dependent approach is necessary due to censoring, which prevents many diagnoses from being observed. In contrast, the standard area under the receiving operating characteristic curve (AUC) and average precision (AP) do not differentiate between nondiagnosed individuals with short versus long follow-up, making them unsuitable for evaluating predicted diagnosis probabilities.</p><p>Harrell concordance index [<xref ref-type="bibr" rid="ref25">25</xref>] was also used to quantify the agreement between likelihood predictions and event times. This metric quantifies the model&#x2019;s ability to discriminate between individuals diagnosed earlier and those diagnosed later or not at all.</p><p>For each metric, we computed the 95% CI of the distribution over performance obtained from 100 bootstrap samples in the test set.</p><p>As we were unable to directly assess the accuracy of the predicted probabilities because diagnoses were not fully observed in the dataset, we instead contextualized them and reasoned about their correctness by analyzing the corresponding published trends.</p></sec></sec><sec id="s2-7"><title>Subgroup Analysis</title><p>To explore possible differential effects of each model setup on specific demographics, we analyzed model predictions and performance in subgroups defined by YOB, follow-up length (ie, age at censoring), sex, race, and insurance. Biological sex was classified as male or female. Race was categorized into the following groups: Asian, Black or African American, White, unavailable, and other. Insurance status was separated into public, private, and other categories.</p><p>To assess the performance of our models on out-of-distribution (OOD) data, we extended the evaluation to include children born after 2018 and individuals with a follow-up duration of &#x003C;5 years for the YOB and follow-up length plots, respectively. For the YOB plots, 2019 and 2020 were designated as OOD years for BC<sub>YOB&#x2264;2018</sub>. Since BC<sub>t&#x2265;5</sub> also fulfilled the YOB&#x2264;2018 criteria, the same years were, by extension, considered OOD. Similarly, for the follow-up length plots, individuals with a follow-up duration of &#x2265;5 years were categorized as in-distribution, while those with &#x003C;5 years were classified as OOD.</p></sec><sec id="s2-8"><title>Semisynthetic ROM Dataset</title><p>To further explore the effect of early censoring on each method&#x2019;s ability to predict diagnosis probability, we simulated early censoring for ROM cases. Unlike ADHD, most ROM diagnoses were observed rather than censored due to the earlier age of diagnosis. Leveraging prior knowledge of true ROM labels, we introduced artificial censoring by scaling the true censoring distribution such that the maximum age is at 1.2 years to mimic the ADHD scenario. Generating a semisynthetic ROM dataset served 2 purposes: reproducing earlier findings on BC limitations with censored data and demonstrating DTNN model performance under such conditions. Additional DTNN and BC models were trained on this semisynthetic train dataset and subsequently evaluated on the original test dataset.</p><p>This study follows the Consolidated Reporting of Machine Learning Studies guidelines (<xref ref-type="supplementary-material" rid="app2">Checklist 1</xref>) [<xref ref-type="bibr" rid="ref26">26</xref>].</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Patient Characteristics</title><p>Records for 57,701 unique patients meeting study criteria were initially extracted. After excluding children born after 2020, the evaluation dataset comprised 43,536 patients (<xref ref-type="table" rid="table1">Table 1</xref>). Based on the respective diagnosis age cutoffs (<xref ref-type="fig" rid="figure1">Figure 1</xref>), we further excluded 1 individual with autism as an outlier due to a diagnosis within the first month of birth, along with 2 individuals with ADHD, 25 individuals with ROM, and 70 with FA. Additionally, individuals with censoring ages preceding the age cutoffs were excluded: 9332 from the autism dataset, 17,691 from the ADHD dataset, 6171 from the ROM dataset, and 5847 from the FA dataset.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Patient demographics.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">Variable and category or value</td><td align="left" valign="bottom">All</td><td align="left" valign="bottom">Autism</td><td align="left" valign="bottom">ADHD<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="left" valign="bottom">ROM<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td><td align="left" valign="bottom">FA<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top" colspan="2">Total, n (%)</td><td align="left" valign="top">43,536 (100)</td><td align="left" valign="top">749 (1.7)</td><td align="left" valign="top">618 (1.4)</td><td align="left" valign="top">5201 (11.9)</td><td align="left" valign="top">916 (2.1)</td></tr><tr><td align="left" valign="top" colspan="7">Sex</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Male, n (%)</td><td align="left" valign="top">22,583 (51.9)</td><td align="left" valign="top">590 (78.8)</td><td align="left" valign="top">432 (69.9)</td><td align="left" valign="top">2951 (56.7)</td><td align="left" valign="top">544 (59.4)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Female, n (%)</td><td align="left" valign="top">20,953 (48.1)</td><td align="left" valign="top">159 (21.2)</td><td align="left" valign="top">186 (30.1)</td><td align="left" valign="top">2250 (43.3)</td><td align="left" valign="top">372 (40.6)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Chi-square (<italic>df</italic>)</td><td align="left" valign="top">N/A<sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup></td><td align="left" valign="top">221.9 (1)</td><td align="left" valign="top">79.7 (1)</td><td align="left" valign="top">58.8 (1)</td><td align="left" valign="top">21.5 (1)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>P</italic> value</td><td align="left" valign="top">N/A</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top" colspan="7">Race, n (%)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Asian</td><td align="left" valign="top">1835 (4.2)</td><td align="left" valign="top">23 (3.1)</td><td align="left" valign="top">8 (1.3)</td><td align="left" valign="top">145 (2.8)</td><td align="left" valign="top">63 (6.9)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Black or African American</td><td align="left" valign="top">13,132 (30.2)</td><td align="left" valign="top">272 (36.3)</td><td align="left" valign="top">206 (33.3)</td><td align="left" valign="top">1226 (23.6)</td><td align="left" valign="top">278 (30.3)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>White</td><td align="left" valign="top">18,681 (42.9)</td><td align="left" valign="top">266 (35.5)</td><td align="left" valign="top">326 (52.8)</td><td align="left" valign="top">2936 (56.5)</td><td align="left" valign="top">418 (45.6)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Unavailable</td><td align="left" valign="top">3874 (8.9)</td><td align="left" valign="top">57 (7.6)</td><td align="left" valign="top">29 (4.7)</td><td align="left" valign="top">390 (7.5)</td><td align="left" valign="top">45 (4.9)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Other</td><td align="left" valign="top">6014 (13.8)</td><td align="left" valign="top">131 (17.5)</td><td align="left" valign="top">49 (7.9)</td><td align="left" valign="top">504 (9.7)</td><td align="left" valign="top">112 (12.2)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Chi-square (<italic>df</italic>)</td><td align="left" valign="top">N/A</td><td align="left" valign="top">22.1 (4)</td><td align="left" valign="top">55 (4)</td><td align="left" valign="top">521.9 (4)</td><td align="left" valign="top">44.7 (4)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>P</italic> value</td><td align="left" valign="top">N/A</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top" colspan="7">Insurance, n (%)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Public</td><td align="left" valign="top">23,262 (53.4)</td><td align="left" valign="top">431 (57.5)</td><td align="left" valign="top">326 (52.8)</td><td align="left" valign="top">2011 (38.7)</td><td align="left" valign="top">319 (34.8)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Private</td><td align="left" valign="top">20,127 (46.2)</td><td align="left" valign="top">316 (42.2)</td><td align="left" valign="top">288 (46.6)</td><td align="left" valign="top">3178 (61.1)</td><td align="left" valign="top">596 (65.1)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Other</td><td align="left" valign="top">147 (0.3)</td><td align="left" valign="top">2 (0.3)</td><td align="left" valign="top">4 (0.6)</td><td align="left" valign="top">12 (0.2)</td><td align="left" valign="top">1 (0.1)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Chi-square (<italic>df</italic>)</td><td align="left" valign="top">N/A</td><td align="left" valign="top">4.3 (2)</td><td align="left" valign="top">0.7 (2)</td><td align="left" valign="top">571.1 (2)</td><td align="left" valign="top">141.7 (2)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>P</italic> value</td><td align="left" valign="top">N/A</td><td align="left" valign="top">.12</td><td align="left" valign="top">.69</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">&#x003C;.001</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>ADHD: attention-deficit/hyperactivity disorder.</p></fn><fn id="table1fn2"><p><sup>b</sup>ROM: recurrent otitis media.</p></fn><fn id="table1fn3"><p><sup>c</sup>FA: food allergy.</p></fn><fn id="table1fn4"><p><sup>d</sup>N/A: not applicable.</p></fn></table-wrap-foot></table-wrap><p>Male-to-female ratios were 3.7 for autism, 2.3 for ADHD, 1.3 for ROM, and 1.5 for FA. All diagnoses were associated with sex (<italic>P</italic>&#x003C;.001) and racial status (<italic>P</italic>&#x003C;.001). ROM and FA were associated with insurance status (<italic>P</italic>&#x003C;.001), but autism and ADHD were not (<italic>P</italic>=.12 and <italic>P</italic>=.69, respectively). Private insurance rates were 3178/5201 (61.1%) and 596/916 (65.1%) in the ROM and FA groups, respectively, compared to 316/749 (42.2%) and 288/618 (46.6%) in the autism and ADHD groups, respectively.</p><p>The mean age at diagnosis for autism and ADHD was 3.75 years and 6.22 years, respectively, higher than that for ROM and FA, which were 1.57 years and 2.01 years, respectively (<xref ref-type="fig" rid="figure1">Figure 1</xref>).</p></sec><sec id="s3-2"><title>Analysis of Performance Metrics</title><p>In general, the TTE models consistently matched or outperformed BC models with higher AUC<sub>t</sub> values across all conditions (<xref ref-type="fig" rid="figure3">Figure 3</xref> and Table S6 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). At clinically relevant operating time points, the AUC values for DTNN<sub>YOB&#x2264;2020</sub> and DCPH<sub>YOB&#x2264;2020</sub> were 0.70 (95% CI 0.66&#x2010;0.77) and 0.72 (95% CI 0.66&#x2010;0.78) at t=5 for autism, 0.72 (95% CI 0.65&#x2010;0.76) and 0.68 (95% CI 0.62&#x2010;0.74) at t=7 for ADHD, 0.72 (95% CI 0.70&#x2010;0.75) and 0.71 (95% CI 0.69&#x2010;0.74) at t=1 for ROM, and 0.74 (95% CI 0.68&#x2010;0.82) and 0.71 (95% CI 0.63&#x2010;0.77) at t=1 for FA, compared to 0.60 (95% CI 0.55&#x2010;0.66), 0.47 (95% CI 0.40&#x2010;0.54), 0.73 (95% CI 0.70&#x2010;0.75), and 0.77 (95% CI 0.71&#x2010;0.82) for BC<sub>YOB&#x2264;2020</sub>, respectively.</p><p>Conversely, the regular AUC values for BC<sub>YOB&#x2264;2020</sub> were consistently higher than those for DTNN<sub>YOB&#x2264;2020</sub> and DCPH<sub>YOB&#x2264;2020</sub>. Notably, a statistically significant difference (<italic>P</italic>&#x003C;.05) was observed in the ADHD prediction task (<inline-formula><mml:math id="ieqn12"><mml:msubsup><mml:mrow><mml:mtext>BC</mml:mtext></mml:mrow><mml:mrow><mml:mtext>YOB&#x2264;2020</mml:mtext></mml:mrow><mml:mrow><mml:mtext>ADHD</mml:mtext></mml:mrow></mml:msubsup></mml:math></inline-formula>: AUC 0.75, 95% CI 0.71&#x2010;0.80; <inline-formula><mml:math id="ieqn13"><mml:msubsup><mml:mrow><mml:mtext>DTNN</mml:mtext></mml:mrow><mml:mrow><mml:mtext>YOB&#x2264;2020</mml:mtext></mml:mrow><mml:mrow><mml:mtext>ADHD</mml:mtext></mml:mrow></mml:msubsup></mml:math></inline-formula>: AUC 0.64, 95% CI 0.59&#x2010;0.69; <inline-formula><mml:math id="ieqn14"><mml:msubsup><mml:mrow><mml:mtext>DCPH</mml:mtext></mml:mrow><mml:mrow><mml:mtext>YOB&#x2264;2020</mml:mtext></mml:mrow><mml:mrow><mml:mtext>ADHD</mml:mtext></mml:mrow></mml:msubsup></mml:math></inline-formula>: AUC 0.64, 95% CI 0.60&#x2010;0.69). With filtering, BC<sub>YOB&#x2264;2020</sub> and BC<sub>t&#x2265;5</sub> exhibited decreased regular AUC, with the latter experiencing a larger decline.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Comparison of AUC<sub>t</sub> (solid lines) and regular AUC (bar graphs). ADHD: attention-deficit/hyperactivity disorder; AUC: area under the receiving operating characteristic curve; AUC<sub>t</sub>: time-varying area under the receiving operating characteristic curve; BC: binary classification; DCPH: deep Cox proportional hazard; DTNN: discrete-time neural network; FA: food allergy; ROM: recurrent otitis media; t: t denotes follow-up length; YOB: year-of-birth.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e62985_fig03.png"/></fig><p>The regular AP and AP<sub>t</sub> exhibited similar trends as described above, with higher AP<sub>t</sub> but lower regular AP for TTE models (Figure S3 and Table S7 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). However, direct comparison and interpretation are difficult due to the variation in test prevalence across different datasets. The concordance index, comparing ordered predicted event probabilities with observed event times, further demonstrates that the TTE models consistently performed as well as or better than the BC models (Table S8 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). In particular, DTNN<sub>YOB&#x2264;2020</sub> and DCPH<sub>YOB&#x2264;2020</sub> achieved 0.656 and 0.667 for autism, 0.682 and 0.657 for ADHD, as compared to 0.629 and 0.558 for BC<sub>YOB&#x2264;2020</sub>, respectively.</p><p>The predicted probabilities for all models closely align with the observed estimates for in-distribution years, demonstrating overall good calibration, while OOD curves (ie, years 2019 and 2020) for BC<sub>YOB&#x2264;2018</sub> and BC<sub>t&#x2265;5</sub> show poor calibration (<xref ref-type="fig" rid="figure4">Figure 4</xref>).</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Calibration analysis. The predicted probabilities were compared with observed event rates across different probability bins, using Kaplan-Meier estimates for the TTE models and true binary outcomes for the BC models. OOD curves (ie, years 2019 and 2020) were also added for BC<sub>YOB&#x2264;2018</sub> and BC<sub>t&#x2265;5</sub>. ADHD: attention-deficit/hyperactivity disorder; BC: binary classification; DCPH: deep Cox proportional hazard; DTNN: discrete-time neural network; FA: food allergy; OOD: out-of-distribution; ROM: recurrent otitis media; t: t denotes follow-up length; TTE: time-to-event.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e62985_fig04.png"/></fig></sec><sec id="s3-3"><title>Semisynthetic Censoring Experiment Results</title><p>The <inline-formula><mml:math id="ieqn15"><mml:msubsup><mml:mrow><mml:mtext>DTNN</mml:mtext></mml:mrow><mml:mrow><mml:mtext>YOB&#x2264;2020</mml:mtext></mml:mrow><mml:mrow><mml:mtext>ROM, ss</mml:mtext></mml:mrow></mml:msubsup></mml:math></inline-formula> performance remained comparable to <inline-formula><mml:math id="ieqn16"><mml:msubsup><mml:mrow><mml:mtext>DTNN</mml:mtext></mml:mrow><mml:mrow><mml:mtext>YOB&#x2264;2020</mml:mtext></mml:mrow><mml:mrow><mml:mtext>ROM</mml:mtext></mml:mrow></mml:msubsup></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn17"><mml:msubsup><mml:mrow><mml:mtext>BC</mml:mtext></mml:mrow><mml:mrow><mml:mtext>YOB&#x2264;2020</mml:mtext></mml:mrow><mml:mrow><mml:mtext>ROM</mml:mtext></mml:mrow></mml:msubsup></mml:math></inline-formula>, exhibited good calibration, AUC<sub>t</sub> and regular AUC values. However, <inline-formula><mml:math id="ieqn18"><mml:msubsup><mml:mrow><mml:mtext>BC</mml:mtext></mml:mrow><mml:mrow><mml:mtext>YOB&#x2264;2020</mml:mtext></mml:mrow><mml:mrow><mml:mtext>ROM, ss</mml:mtext></mml:mrow></mml:msubsup></mml:math></inline-formula> displayed worse calibration due to underprediction, and had lower AUC<sub>t</sub> and regular AUC values (<xref ref-type="fig" rid="figure5">Figure 5</xref>). Note that comparing performances beyond 1.2 years would be unfair, as those observed times were not available for model learning during training in the semisynthetic setup.</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Comparison of performance metrics evaluated on the original test set between BC and DTNN models trained on original and semisynthetic ROM train datasets. AUC: area under the receiving operating characteristic curve; AUC<sub>t</sub>: time-varying area under the receiving operating characteristic curve; BC: binary classification; DTNN: discrete-time neural network; ROM: recurrent otitis media; YOB: year-of-birth.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e62985_fig05.png"/></fig></sec><sec id="s3-4"><title>Subgroup Analyses</title><p>Probabilities predicted by BC<sub>YOB&#x2264;2020</sub> decreased over time across all conditions. This trend was less pronounced for BC<sub>YOB&#x2264;2018</sub> and BC<sub>t&#x2265;5</sub> (<xref ref-type="fig" rid="figure6">Figure 6</xref>). In contrast, the probabilities predicted by DTNN<sub>YOB&#x2264;2020</sub> for autism and ADHD showed a consistent yearly increase. For ROM, predicted probabilities declined from 2014 to 2017, then increased from 2018 onward. For FA, predicted probabilities modestly increased from 2014 to 2015, then stabilized at approximately 3.4%&#x2010;3.5% in subsequent years. The results for DCPH<sub>YOB&#x2264;2020</sub> were heterogeneous.</p><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Grouped analysis of predicted probability distributions by year-of-birth. ADHD: attention-deficit/hyperactivity disorder; BC: binary classification; DCPH: deep Cox proportional hazard; DTNN: discrete-time neural network; FA: food allergy; ROM: recurrent otitis media; t: t denotes follow-up length; YOB: year-of-birth.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e62985_fig06.png"/></fig><p>We expanded our YOB subgroup analysis to include 2019 and 2020 to evaluate BC model behaviours during these OOD years (<xref ref-type="fig" rid="figure6">Figure 6</xref>). BC<sub>t&#x2265;5</sub> exhibited a modest decrease in predicted probabilities across all the conditions, more pronounced in 2020 than in 2019, while BC<sub>YOB&#x2264;2018</sub> remained relatively stable.</p><p>There was a positive correlation observed between the predicted probability and follow-up length in all BC models, albeit to a lesser extent in BC<sub>YOB&#x2264;2020</sub> and BC<sub>t&#x2265;5</sub> (<xref ref-type="fig" rid="figure7">Figure 7</xref>). A similar trend was apparent in the analysis of the concordance between predicted nonevent probabilities with the observed censoring times (<xref ref-type="table" rid="table2">Table 2</xref>), with <inline-formula><mml:math id="ieqn19"><mml:msubsup><mml:mrow><mml:mtext>BC</mml:mtext></mml:mrow><mml:mrow><mml:mtext>YOB&#x2264;2020</mml:mtext></mml:mrow><mml:mrow><mml:mtext>ADHD</mml:mtext></mml:mrow></mml:msubsup></mml:math></inline-formula> showing the highest concordance index of 0.734. BC predictions appeared to align with the test prevalence (Figures S7-S9 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), whereas DTNN and DCPH predictions did not (Figures S5 nd S6 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p><fig position="float" id="figure7"><label>Figure 7.</label><caption><p>Grouped analysis of predicted probability distributions by follow-up length in years. ADHD: attention-deficit/hyperactivity disorder; BC: binary classification; DCPH: deep Cox proportional hazard; DTNN: discrete-time neural network; FA: food allergy; ROM: recurrent otitis media; t: t denotes follow-up length; YOB: year-of-birth.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e62985_fig07.png"/></fig><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Concordance index by comparing ordered predicted nonevent probabilities of BC<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup> models with observed censoring times.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">Autism</td><td align="left" valign="bottom">ADHD<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td><td align="left" valign="bottom">ROM<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="bottom">FA<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">BC<sub>YOB&#x2264;2020<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></sub></td><td align="left" valign="top">0.581</td><td align="left" valign="top">0.734</td><td align="left" valign="top">0.605</td><td align="left" valign="top">0.558</td></tr><tr><td align="left" valign="top">BC<sub>YOB&#x2264;2018</sub></td><td align="left" valign="top">0.533</td><td align="left" valign="top">0.625</td><td align="left" valign="top">0.605</td><td align="left" valign="top">0.535</td></tr><tr><td align="left" valign="top">BC<sub>t&#x2265;5<sup><xref ref-type="table-fn" rid="table2fn6">f</xref></sup></sub></td><td align="left" valign="top">0.5</td><td align="left" valign="top">0.605</td><td align="left" valign="top">0.576</td><td align="left" valign="top">0.491</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>BC: binary classification.</p></fn><fn id="table2fn2"><p><sup>b</sup>ADHD: attention-deficit/hyperactivity disorder.</p></fn><fn id="table2fn3"><p><sup>c</sup>ROM: recurrent otitis media.</p></fn><fn id="table2fn4"><p><sup>d</sup>FA: food allergy.</p></fn><fn id="table2fn5"><p><sup>e</sup>YOB: year-of-birth.</p></fn><fn id="table2fn6"><p><sup>f</sup>t denotes follow-up length.</p></fn></table-wrap-foot></table-wrap><p>In all 4 conditions, DTNN predicted a greater likelihood of diagnosis for males. Among the racial groups, Asians had the highest predicted probability for autism and FA, while White individuals displayed the highest predicted probability for ADHD and ROM. Regarding insurance status, individuals with private insurance were more likely to be diagnosed with ROM and FA; however, findings for autism and ADHD were equivocal (<xref ref-type="fig" rid="figure8">Figure 8</xref>).</p><p>The individual results of the subgroup analysis by demographics for each model setup are available in Figures S10-S12 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><fig position="float" id="figure8"><label>Figure 8.</label><caption><p>Demographics analysis of probability distributions by DTNN<sub>YOB&#x2264;2020</sub>. The subgroups are sex, race, and insurance status. ADHD: attention-deficit/hyperactivity disorder; DTNN: discrete-time neural network; FA: food allergy; ROM: recurrent otitis media; YOB: year-of-birth.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e62985_fig08.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>Our study contributes to the understanding of how right-censoring influences model performance and predicted probabilities over time using EHR data. We highlight inherent limitations of BC in such contexts, even with filtering strategies. Furthermore, our results reinforce the potential of TTE approaches, particularly DTNN, in mitigating bias from the censoring distribution, leading to superior discrimination, calibration, and clinical prevalence prediction.</p></sec><sec id="s4-2"><title>Principal Results</title><p>First, we demonstrated that BC cannot disentangle the probability of diagnosis and early censoring, even with filtering. The BC models displayed poor AUC<sub>t</sub> performance, despite achieving high regular AUC scores (<xref ref-type="fig" rid="figure3">Figure 3</xref> and Table S6 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). This discrepancy arises because AUC<sub>t</sub> calculation excludes individuals censored before prediction time <italic>t</italic> whereas regular AUC calculation does not. Thus, the AUC is artificially inflated by &#x201C;correctly&#x201D; predicting diagnosed individuals in this subgroup of individuals who were censored early as negative cases. With filtering, BC<sub>YOB&#x2264;2018</sub> and BC<sub>t&#x2265;5</sub> benefitted less, resulting in lower regular AUC scores because more true cases with later diagnoses were excluded.</p><p>Spurious positive correlations between the predicted probability and follow-up length imply that BC models were unduly benefitting from early censoring (<xref ref-type="fig" rid="figure7">Figure 7</xref>), along with increased concordance between predicted nonevent probabilities and observed censoring times (<xref ref-type="table" rid="table2">Table 2</xref>). Similarly, these differences were less prominent in BC<sub>YOB&#x2264;2020</sub> and even less in BC<sub>t&#x2265;5</sub>, but not completely absent.</p><p>This contrast was exacerbated in long-horizon prediction tasks such as ADHD, with the degree of variation corresponding with the tail end of the diagnosis age distributions (<xref ref-type="fig" rid="figure1">Figure 1</xref>). ADHD showed the highest proportion of later diagnoses, followed by autism and FA, and the lowest in ROM. These results corroborate observations associating censoring with biased improved outcomes, where hazard ratios fall below 1 compared to complete follow-up and correlate inversely with the proportion of censored cases [<xref ref-type="bibr" rid="ref27">27</xref>].</p><p>Second, we found that TTE models outperformed BC models on all datasets. In diagnoses with longer time horizons, heavy right-censoring leads to many individuals having unknown status, while shorter prediction time horizons tend to have better follow-up. DTNN<sub>YOB&#x2264;2020</sub> and DCPH<sub>YOB&#x2264;2020</sub> achieved comparable or higher AUC<sub>t</sub> scores in predicting ROM and FA (<xref ref-type="fig" rid="figure3">Figure 3</xref> and Table S6 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), suggesting that TTE models matched or surpassed BC models on datasets with less censoring. This superiority is particularly pronounced in autism and ADHD datasets, which experience heavier censoring. The main insight is that TTE models are well-suited to predict clinical outcomes, especially those with prolonged time horizons.</p><p>In our semisynthetic ROM censoring experiment, we reproduced the limitations of BC as evidenced by the deterioration in AUC<sub>t</sub> and regular AUC performance of <inline-formula><mml:math id="ieqn20"><mml:msubsup><mml:mrow><mml:mtext>BC</mml:mtext></mml:mrow><mml:mrow><mml:mtext>YOB&#x2264;2020</mml:mtext></mml:mrow><mml:mrow><mml:mtext>ROM, ss</mml:mtext></mml:mrow></mml:msubsup></mml:math></inline-formula> when evaluated on the original dataset (<xref ref-type="fig" rid="figure5">Figure 5</xref> and Table S6 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). This result supports our earlier claim that the BC models were underpredicting diagnosed individuals with early censoring. We also demonstrated that <inline-formula><mml:math id="ieqn21"><mml:msubsup><mml:mrow><mml:mtext>DTNN</mml:mtext></mml:mrow><mml:mrow><mml:mtext>YOB&#x2264;2020</mml:mtext></mml:mrow><mml:mrow><mml:mtext>ROM, ss</mml:mtext></mml:mrow></mml:msubsup></mml:math></inline-formula> remained well-calibrated and maintained comparable AUC<sub>t</sub> performance as <inline-formula><mml:math id="ieqn22"><mml:msubsup><mml:mrow><mml:mtext>DTNN</mml:mtext></mml:mrow><mml:mrow><mml:mtext>YOB&#x2264;2020</mml:mtext></mml:mrow><mml:mrow><mml:mtext>ROM</mml:mtext></mml:mrow></mml:msubsup></mml:math></inline-formula> (<xref ref-type="fig" rid="figure5">Figure 5</xref>), demonstrating the applicability of our TTE approach in situations with partially observed information.</p><p>We also examined the impact of BC filtering strategies on OOD years. Specifically, we extended the evaluation to include 2019 and 2020 (<xref ref-type="fig" rid="figure6">Figure 6</xref>). Notably, a discernible decline in predicted probabilities was observed for BC<sub>t&#x2265;5</sub> across all clinical conditions, with a slightly more pronounced drop in 2020 compared to 2019. In contrast, predicted probabilities by BC<sub>YOB&#x2264;2018</sub> remained relatively stable during the same OOD years. This suggests that the inclusion of older individuals (ie, born before 2018) with shorter follow-up (ie, &#x003C;5 years) makes predictions more stable on OOD years. However, including these individuals results in declining predicted probabilities due to early censoring on in-distribution years, as we have previously demonstrated. Moreover, BC<sub>YOB&#x2264;2018</sub> and BC<sub>t&#x2265;5</sub> showed poor calibration for all diagnoses on OOD years (<xref ref-type="fig" rid="figure4">Figure 4</xref>), rendering them unsuitable for clinical deployment.</p><p>Temporal and demographics trends were poorly represented in BC and DCPH. The probability of diagnosis should remain stable or increase over time due to improved awareness and tools unless specific interventions are implemented. However, BC<sub>YOB&#x2264;2020</sub> exhibited declining predicted probability for all diagnoses because the models assigned lower probability scores to individuals born later, despite the absence of temporal information during learning. Inadvertently, BC predictions follow test prevalence, which also contributes to its poor performance in the demographics subgroup analysis.</p><p>The unclear patterns in DCPH models likely result from a violation of the proportional hazards assumption, which is common in practice. For example, varying severity levels in autism and ADHD diagnoses can lead to nonproportionality, where low-likelihood groups initially exhibit delays in hazard before catching up with the high-likelihood groups [<xref ref-type="bibr" rid="ref28">28</xref>]. By assuming constant hazard rates over time, DCPH models may not fully leverage the complexity of likelihood representations and time-dependent covariate impacts. While excelling in providing generalized representations at a population level (<xref ref-type="fig" rid="figure3">Figure 3</xref> and Figure S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), our findings suggest inconsistent or inaccurate outcomes in subgroup analyses (<xref ref-type="fig" rid="figure6">Figures 6</xref> and <xref ref-type="fig" rid="figure7">7</xref>, and Figures S10-S12 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). DTNN, however, does not assume proportional hazards, enabling better capture of time-dependent covariate influences on survival.</p><p>In contrast to the BC and DCPH models, the diagnosis probabilities predicted by the DTNN models (<xref ref-type="fig" rid="figure6">Figures 6</xref> and <xref ref-type="fig" rid="figure8">8</xref>) are in keeping with actual prevalence, reflecting both temporal and demographic trends. For example, autism prevalence increased from 2.24% in 2014 to 2.79% in 2019 [<xref ref-type="bibr" rid="ref29">29</xref>], with higher rates among males and Black individuals [<xref ref-type="bibr" rid="ref30">30</xref>]. Our demographics analysis for ADHD also concurs with trends toward increased prevalence in males and White individuals [<xref ref-type="bibr" rid="ref31">31</xref>]. Note that the reported prevalence in DUHS may exceed nationwide estimates, given its status as a regional hub for neurodevelopmental diagnosis.</p><p>Interestingly, for ROM, our DTNN models appear consistent with distinctive temporal patterns including (1) declining prevalence from 2014 to 2017 associated with the availability of postpneumococcal conjugate vaccines [<xref ref-type="bibr" rid="ref32">32</xref>] and (2) increasing prevalence from 2018 to 2020 amid the COVID-19 pandemic [<xref ref-type="bibr" rid="ref33">33</xref>]. The DTNN models also accurately predict increased likelihood associated with male sex, White race, lower socioeconomic status [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref34">34</xref>], and private insurance, which reflect health care use disparities [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref36">36</xref>].</p><p>Our models suggest stable FA prevalence (~3.4%&#x2010;3.5%), adding to mixed data that challenge whether rates have increased (range: 4.8%&#x2010;8%) [<xref ref-type="bibr" rid="ref37">37</xref>]. This discrepancy may arise due to difficulties in estimating true prevalence [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>] or our stricter diagnostic criteria (<italic>ICD-10</italic> code+IgE-based laboratory test) compared to other studies using surrogate laboratory tests or self-report, which tend to overestimate rates of clinical disease [<xref ref-type="bibr" rid="ref40">40</xref>-<xref ref-type="bibr" rid="ref42">42</xref>]. Demographically, our findings corroborate higher FA prevalence among males [<xref ref-type="bibr" rid="ref43">43</xref>] and Asian and non-Hispanic Black individuals compared to non-Hispanic White individuals [<xref ref-type="bibr" rid="ref44">44</xref>]. Additionally, our models corroborated the lower FA prevalence reported among children with public insurance [<xref ref-type="bibr" rid="ref45">45</xref>].</p><p>Our findings suggest that TTE models, particularly the DTNN, should be preferred in clinical settings dealing with right censored outcomes. First, the DTNN models outperformed BC models, yielding clinically meaningful discriminatory performance with AUC<sub>t</sub>&#x2265;0.7 at early ages across all 4 clinical conditions, supporting earlier diagnoses and timely interventions. Second, the DTNN approach addresses label bias that may lead to underprediction, as evidenced by its superior discrimination, calibration and ability to reflect clinical prevalence. While the modelling approach is arguably more challenging, it avoids the need for complex and often opaque filtering procedures.</p></sec><sec id="s4-3"><title>Limitations</title><p>Our study has important limitations. First, it is confined to data from DUHS only, which primarily serves a population with a high representation of Black and White individuals. This demographic makeup may limit the generalizability of the results to other health systems with different patient demographics. Second, computable phenotypes are imperfect, as the identification and timing of diagnosis can vary in practice. Third, not all information, including vital signs and laboratory values, was used during the training process. Fourth, we do not include every possible filtering strategy and competing model, which may contribute to the breadth of our findings. Fifth, sex bias may also influence diagnosis trends, with males being more likely to be diagnosed with autism in practice. To the extent that sex affects the distribution of event times, the discrete-time approach can help mitigate this bias, because it does not conflate diagnosis probability with timing unlike BC and DCPH approaches. However, to the extent that sex also influences the probability of diagnosis at any given point, this is not a bias that we can overcome by choice of model alone and will require efforts to change assessment practices. Finally, the constrained size of our dataset prevents us from conducting finer subgroup analyses. For example, we could not explore temporal trends among different demographics, such as instances where autism rates among Black children surpassed those among White children [<xref ref-type="bibr" rid="ref46">46</xref>]. To address these limitations, we recommend incorporating data from diverse health systems, including a broader range of clinically relevant EHR data, exploring additional filtering strategies, and expanding dataset size to enable more detailed subgroup analyses.</p></sec><sec id="s4-4"><title>Conclusion</title><p>Machine learning practitioners should acknowledge the inherent limitations of BC on right-censored outcomes and consider TTE approaches, particularly DTNN, in the clinical context. Our study paves the way for future research to identify and optimize models to improve patient outcomes.</p></sec></sec></body><back><ack><p>This work was supported by the National Institute of Mental Health (K01-MH127309; principal investigator ME) and Eunice Kennedy Shriver National Institute of Child Health and Human Development (NICHD P50HD093074; principal investigator GD).</p></ack><notes><sec><title>Data Availability</title><p>The datasets generated or analyzed during this study are not publicly available due to privacy regulations and ethical considerations related to electronic health record and cannot be shared.</p></sec></notes><fn-group><fn fn-type="con"><p>DRL completed all analyses, drafted the initial paper, and revised this paper. ME conceptualized this study, provided feedback on analyses, and reviewed and revised this paper. EH, NL, and GD provided feedback on analyses, and reviewed and revised this paper. All authors contributed to the study design and concept. All authors approved the final paper as submitted and agree to be accountable for all aspects of the work.</p></fn><fn fn-type="conflict"><p>GD is on the Scientific Advisory Board of Tris Pharma, Inc, and the Nonverbal Learning Disability Project and received book royalties from Guilford Press and Springer Nature Press.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">ADHD</term><def><p> attention-deficit/hyperactivity disorder</p></def></def-item><def-item><term id="abb2">AP</term><def><p>average precision</p></def></def-item><def-item><term id="abb3">AP<sub>t</sub></term><def><p>time-varying average precision</p></def></def-item><def-item><term id="abb4">AUC</term><def><p>area under the receiving operating characteristic curve</p></def></def-item><def-item><term id="abb5">AUC<sub>t</sub></term><def><p>time-varying area under the receiving operating characteristic curve</p></def></def-item><def-item><term id="abb6">BC</term><def><p>binary classification</p></def></def-item><def-item><term id="abb7">DCPH</term><def><p>deep Cox proportional hazard</p></def></def-item><def-item><term id="abb8">DTNN</term><def><p>discrete-time neural network</p></def></def-item><def-item><term id="abb9">DUHS</term><def><p> Duke University Health System</p></def></def-item><def-item><term id="abb10">EHR</term><def><p>electronic health record</p></def></def-item><def-item><term id="abb11">FA</term><def><p>food allergy</p></def></def-item><def-item><term id="abb12"><italic>ICD-10</italic></term><def><p><italic>International Classification of Diseases, Tenth Revision</italic></p></def></def-item><def-item><term id="abb13">OOD</term><def><p> out-of-distribution</p></def></def-item><def-item><term id="abb14">PACE</term><def><p>Protected Analytics Computing Environment</p></def></def-item><def-item><term id="abb15">ROM</term><def><p>recurrent otitis media</p></def></def-item><def-item><term id="abb16">t</term><def><p>t denotes follow-up length</p></def></def-item><def-item><term id="abb17">TTE</term><def><p>time-to-event</p></def></def-item><def-item><term id="abb18">YOB</term><def><p>year-of-birth</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rajkomar</surname><given-names>A</given-names> </name><name name-style="western"><surname>Oren</surname><given-names>E</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Scalable and accurate deep learning with electronic health records</article-title><source>NPJ Digit Med</source><year>2018</year><volume>1</volume><issue>1</issue><fpage>18</fpage><pub-id pub-id-type="doi">10.1038/s41746-018-0029-1</pub-id><pub-id pub-id-type="medline">31304302</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stajduhar</surname><given-names>I</given-names> </name><name name-style="western"><surname>Dalbelo-Basi&#x0107;</surname><given-names>B</given-names> </name><name name-style="western"><surname>Bogunovi&#x0107;</surname><given-names>N</given-names> </name></person-group><article-title>Impact of censoring on learning Bayesian networks in survival modelling</article-title><source>Artif Intell Med</source><year>2009</year><month>11</month><volume>47</volume><issue>3</issue><fpage>199</fpage><lpage>217</lpage><pub-id pub-id-type="doi">10.1016/j.artmed.2009.08.001</pub-id><pub-id pub-id-type="medline">19833488</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Weber</surname><given-names>GM</given-names> </name><name name-style="western"><surname>Adams</surname><given-names>WG</given-names> </name><name name-style="western"><surname>Bernstam</surname><given-names>EV</given-names> </name><etal/></person-group><article-title>Biases introduced by filtering electronic health records for patients with &#x201C;complete data&#x201D;</article-title><source>J Am Med Inform Assoc</source><year>2017</year><month>11</month><day>1</day><volume>24</volume><issue>6</issue><fpage>1134</fpage><lpage>1141</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocx071</pub-id><pub-id pub-id-type="medline">29016972</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ishwaran</surname><given-names>H</given-names> </name><name name-style="western"><surname>Kogalur</surname><given-names>UB</given-names> </name><name name-style="western"><surname>Blackstone</surname><given-names>EH</given-names> </name><name name-style="western"><surname>Lauer</surname><given-names>MS</given-names> </name></person-group><article-title>Random survival forests</article-title><source>Ann Appl Stat</source><year>2008</year><volume>2</volume><issue>3</issue><fpage>841</fpage><lpage>860</lpage><pub-id pub-id-type="doi">10.1214/08-AOAS169</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ibrahim</surname><given-names>N</given-names> </name><name name-style="western"><surname>Kudus</surname><given-names>A</given-names> </name><name name-style="western"><surname>Daud</surname><given-names>I</given-names> </name><name name-style="western"><surname>Bakar</surname><given-names>M</given-names> </name></person-group><article-title>Decision tree for competing risks survival probability in breast cancer study</article-title><source>Int J Biol Med Sci</source><year>2008</year><volume>3</volume><fpage>25</fpage><lpage>29</lpage><pub-id pub-id-type="doi">10.5281/zenodo.1078975</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bandyopadhyay</surname><given-names>S</given-names> </name><name name-style="western"><surname>Wolfson</surname><given-names>J</given-names> </name><name name-style="western"><surname>Vock</surname><given-names>DM</given-names> </name><etal/></person-group><article-title>Data mining for censored time-to-event data: a Bayesian network model for predicting cardiovascular risk from electronic health record data</article-title><source>Data Min Knowl Disc</source><year>2015</year><month>07</month><volume>29</volume><issue>4</issue><fpage>1033</fpage><lpage>1069</lpage><pub-id pub-id-type="doi">10.1007/s10618-014-0386-6</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Brownstein</surname><given-names>NC</given-names> </name><name name-style="western"><surname>Bunn</surname><given-names>V</given-names> </name><name name-style="western"><surname>Castro</surname><given-names>LM</given-names> </name><name name-style="western"><surname>Sinha</surname><given-names>D</given-names> </name></person-group><article-title>Bayesian analysis of survival data with missing censoring indicators</article-title><source>Biometrics</source><year>2021</year><month>03</month><volume>77</volume><issue>1</issue><fpage>305</fpage><lpage>315</lpage><pub-id pub-id-type="doi">10.1111/biom.13280</pub-id><pub-id pub-id-type="medline">32282929</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cox</surname><given-names>DR</given-names> </name></person-group><article-title>Regression models and life-tables</article-title><source>J R Stat Soc Ser B</source><year>1972</year><month>01</month><day>1</day><volume>34</volume><issue>2</issue><fpage>187</fpage><lpage>202</lpage><pub-id pub-id-type="doi">10.1111/j.2517-6161.1972.tb00899.x</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Biganzoli</surname><given-names>E</given-names> </name><name name-style="western"><surname>Boracchi</surname><given-names>P</given-names> </name><name name-style="western"><surname>Mariani</surname><given-names>L</given-names> </name><name name-style="western"><surname>Marubini</surname><given-names>E</given-names> </name></person-group><article-title>Feed forward neural networks for the analysis of censored survival data: a partial logistic regression approach</article-title><source>Stat Med</source><year>1998</year><month>05</month><day>30</day><volume>17</volume><issue>10</issue><fpage>1169</fpage><lpage>1186</lpage><pub-id pub-id-type="doi">10.1002/(sici)1097-0258(19980530)17:10&#x003C;1169::aid-sim796&#x003E;3.0.co;2-d</pub-id><pub-id pub-id-type="medline">9618776</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gensheimer</surname><given-names>MF</given-names> </name><name name-style="western"><surname>Narasimhan</surname><given-names>B</given-names> </name></person-group><article-title>A scalable discrete-time survival model for neural networks</article-title><source>PeerJ</source><year>2019</year><volume>7</volume><fpage>e6257</fpage><pub-id pub-id-type="doi">10.7717/peerj.6257</pub-id><pub-id pub-id-type="medline">30701130</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vock</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Wolfson</surname><given-names>J</given-names> </name><name name-style="western"><surname>Bandyopadhyay</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Adapting machine learning techniques to censored time-to-event health record data: a general-purpose approach using inverse probability of censoring weighting</article-title><source>J Biomed Inform</source><year>2016</year><month>06</month><volume>61</volume><fpage>119</fpage><lpage>131</lpage><pub-id pub-id-type="doi">10.1016/j.jbi.2016.03.009</pub-id><pub-id pub-id-type="medline">26992568</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Solares</surname><given-names>JRA</given-names> </name><name name-style="western"><surname>Raimondi</surname><given-names>FED</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Deep learning for electronic health records: a comparative review of multiple deep neural architectures</article-title><source>J Biomed Inform</source><year>2020</year><month>01</month><volume>101</volume><fpage>103337</fpage><pub-id pub-id-type="doi">10.1016/j.jbi.2019.103337</pub-id><pub-id pub-id-type="medline">31916973</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Katzman</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Shaham</surname><given-names>U</given-names> </name><name name-style="western"><surname>Cloninger</surname><given-names>A</given-names> </name><name name-style="western"><surname>Bates</surname><given-names>J</given-names> </name><name name-style="western"><surname>Jiang</surname><given-names>T</given-names> </name><name name-style="western"><surname>Kluger</surname><given-names>Y</given-names> </name></person-group><article-title>DeepSurv: personalized treatment recommender system using a Cox proportional hazards deep neural network</article-title><source>BMC Med Res Methodol</source><year>2018</year><month>02</month><day>26</day><volume>18</volume><issue>1</issue><fpage>24</fpage><pub-id pub-id-type="doi">10.1186/s12874-018-0482-1</pub-id><pub-id pub-id-type="medline">29482517</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Engelhard</surname><given-names>M</given-names> </name><name name-style="western"><surname>Henao</surname><given-names>R</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Camps-Valls</surname><given-names>G</given-names> </name><name name-style="western"><surname>Ruiz</surname><given-names>FJR</given-names> </name><name name-style="western"><surname>Valera</surname><given-names>I</given-names> </name></person-group><article-title>Disentangling whether from when in a neural mixture cure model for failure time data</article-title><source>Proceedings of The 25th Int Conf Artif Intell Stat, PMLR</source><year>2022</year><access-date>2025-03-12</access-date><volume>151</volume><fpage>9571</fpage><lpage>9581</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://proceedings.mlr.press/v151/engelhard22a.html">https://proceedings.mlr.press/v151/engelhard22a.html</ext-link></comment><pub-id pub-id-type="medline">35937033</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="web"><article-title>LongHorizonDiagnosis</article-title><source>GitHub</source><access-date>2025-03-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/engelhard-lab/LongHorizonDiagnosis">https://github.com/engelhard-lab/LongHorizonDiagnosis</ext-link></comment></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stolte</surname><given-names>A</given-names> </name><name name-style="western"><surname>Merli</surname><given-names>MG</given-names> </name><name name-style="western"><surname>Hurst</surname><given-names>JH</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wood</surname><given-names>CT</given-names> </name><name name-style="western"><surname>Goldstein</surname><given-names>BA</given-names> </name></person-group><article-title>Using electronic health records to understand the population of local children captured in a large health system in Durham County, NC, USA, and implications for population health research</article-title><source>Soc Sci Med</source><year>2022</year><month>03</month><volume>296</volume><fpage>114759</fpage><pub-id pub-id-type="doi">10.1016/j.socscimed.2022.114759</pub-id><pub-id pub-id-type="medline">35180593</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Engelhard</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Henao</surname><given-names>R</given-names> </name><name name-style="western"><surname>Berchuck</surname><given-names>SI</given-names> </name><etal/></person-group><article-title>Predictive value of early autism detection models based on electronic health record data collected before age 1 year</article-title><source>JAMA Netw Open</source><year>2023</year><month>02</month><day>1</day><volume>6</volume><issue>2</issue><fpage>e2254303</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2022.54303</pub-id><pub-id pub-id-type="medline">36729455</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="web"><article-title>RxNorm</article-title><source>US National Library of Medicine</source><access-date>2025-03-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.nlm.nih.gov/research/umls/rxnorm/index.html">https://www.nlm.nih.gov/research/umls/rxnorm/index.html</ext-link></comment></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="web"><article-title>CPT</article-title><source>American Medical Association</source><year>2024</year><month>08</month><day>23</day><access-date>2025-03-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.ama-assn.org/practice-management/cpt">https://www.ama-assn.org/practice-management/cpt</ext-link></comment></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="web"><article-title>LOINC</article-title><source>Regenstrief institute</source><access-date>2025-03-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://loinc.org/">https://loinc.org/</ext-link></comment></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Kvamme</surname><given-names>H</given-names> </name><name name-style="western"><surname>Hart</surname><given-names>B</given-names> </name><name name-style="western"><surname>Pati</surname><given-names>S</given-names> </name><name name-style="western"><surname>Sellereite</surname><given-names>N</given-names> </name></person-group><article-title>pycox</article-title><source>GitHub</source><year>2022</year><month>01</month><access-date>2025-03-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/havakv/pycox.git">https://github.com/havakv/pycox.git</ext-link></comment></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pedregosa</surname><given-names>F</given-names> </name><name name-style="western"><surname>Varoquaux</surname><given-names>G</given-names> </name><name name-style="western"><surname>Gramfort</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Scikit-learn: machine learning in python</article-title><source>J Mach Learn Res</source><year>2011</year><access-date>2025-03-12</access-date><volume>12</volume><fpage>2825</fpage><lpage>2830</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.jmlr.org/papers/volume12/pedregosa11a/pedregosa11a.pdf?source=post_page">https://www.jmlr.org/papers/volume12/pedregosa11a/pedregosa11a.pdf?source=post_page</ext-link></comment></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>D&#x2019;Agostino</surname><given-names>RB</given-names> </name><name name-style="western"><surname>Nam</surname><given-names>BH</given-names> </name></person-group><article-title>Evaluation of the performance of survival analysis models: discrimination and calibration measures</article-title><source>Handbook of Statistics</source><volume>2003</volume><publisher-name>Elsevier</publisher-name><fpage>1</fpage><lpage>25</lpage><pub-id pub-id-type="doi">10.1016/S0169-7161(03)23001-7</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Antolini</surname><given-names>L</given-names> </name><name name-style="western"><surname>Boracchi</surname><given-names>P</given-names> </name><name name-style="western"><surname>Biganzoli</surname><given-names>E</given-names> </name></person-group><article-title>A time-dependent discrimination index for survival data</article-title><source>Stat Med</source><year>2005</year><month>12</month><day>30</day><volume>24</volume><issue>24</issue><fpage>3927</fpage><lpage>3944</lpage><pub-id pub-id-type="doi">10.1002/sim.2427</pub-id><pub-id pub-id-type="medline">16320281</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Longato</surname><given-names>E</given-names> </name><name name-style="western"><surname>Vettoretti</surname><given-names>M</given-names> </name><name name-style="western"><surname>Di Camillo</surname><given-names>B</given-names> </name></person-group><article-title>A practical perspective on the concordance index for the evaluation and selection of prognostic time-to-event models</article-title><source>J Biomed Inform</source><year>2020</year><month>08</month><volume>108</volume><fpage>103496</fpage><pub-id pub-id-type="doi">10.1016/j.jbi.2020.103496</pub-id><pub-id pub-id-type="medline">32652236</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>El Emam</surname><given-names>K</given-names> </name><name name-style="western"><surname>Leung</surname><given-names>TI</given-names> </name><name name-style="western"><surname>Malin</surname><given-names>B</given-names> </name><name name-style="western"><surname>Klement</surname><given-names>W</given-names> </name><name name-style="western"><surname>Eysenbach</surname><given-names>G</given-names> </name></person-group><article-title>Consolidated Reporting Guidelines for Prognostic and Diagnostic Machine Learning Models (CREMLS)</article-title><source>J Med Internet Res</source><year>2024</year><month>05</month><day>2</day><volume>26</volume><fpage>e52508</fpage><pub-id pub-id-type="doi">10.2196/52508</pub-id><pub-id pub-id-type="medline">38696776</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Barrajon</surname><given-names>E</given-names> </name><name name-style="western"><surname>Barrajon</surname><given-names>L</given-names> </name></person-group><article-title>Effect of right censoring bias on survival analysis</article-title><source>JCO</source><year>2019</year><month>05</month><day>20</day><volume>37</volume><issue>15_suppl</issue><fpage>e18188</fpage><pub-id pub-id-type="doi">10.1200/JCO.2019.37.15_suppl.e18188</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Aalen</surname><given-names>OO</given-names> </name><name name-style="western"><surname>Gjessing</surname><given-names>HK</given-names> </name></person-group><article-title>Understanding the shape of the hazard rate: a process point of view (with comments and a rejoinder by the authors)</article-title><source>Statist Sci</source><year>2001</year><volume>16</volume><issue>1</issue><fpage>1</fpage><lpage>22</lpage><pub-id pub-id-type="doi">10.1214/ss/998929473</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yuan</surname><given-names>J</given-names> </name><name name-style="western"><surname>Li</surname><given-names>M</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>ZK</given-names> </name></person-group><article-title>Racial/ethnic disparities in the prevalence and trends of autism spectrum disorder in US children and adolescents</article-title><source>JAMA Netw Open</source><year>2021</year><month>03</month><day>1</day><volume>4</volume><issue>3</issue><fpage>e210771</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2021.0771</pub-id><pub-id pub-id-type="medline">33666658</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Maenner</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Warren</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Williams</surname><given-names>AR</given-names> </name></person-group><article-title>Prevalence and characteristics of autism spectrum disorder among children aged 8 years &#x2014; autism and developmental disabilities monitoring network, 11 sites, United States, 2020</article-title><source>MMWR Surveill Summ</source><year>2020</year><volume>72</volume><issue>2</issue><fpage>1</fpage><lpage>14</lpage><pub-id pub-id-type="doi">10.15585/mmwr.ss7202a1</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xu</surname><given-names>G</given-names> </name><name name-style="western"><surname>Strathearn</surname><given-names>L</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>B</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>B</given-names> </name><name name-style="western"><surname>Bao</surname><given-names>W</given-names> </name></person-group><article-title>Twenty-year trends in diagnosed attention-deficit/hyperactivity disorder among us children and adolescents, 1997-2016</article-title><source>JAMA Netw Open</source><year>2018</year><month>08</month><day>3</day><volume>1</volume><issue>4</issue><fpage>e181471</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2018.1471</pub-id><pub-id pub-id-type="medline">30646132</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kaur</surname><given-names>R</given-names> </name><name name-style="western"><surname>Morris</surname><given-names>M</given-names> </name><name name-style="western"><surname>Pichichero</surname><given-names>ME</given-names> </name></person-group><article-title>Epidemiology of acute otitis media in the postpneumococcal conjugate vaccine era</article-title><source>Pediatrics</source><year>2017</year><month>09</month><volume>140</volume><issue>3</issue><fpage>e20170181</fpage><pub-id pub-id-type="doi">10.1542/peds.2017-0181</pub-id><pub-id pub-id-type="medline">28784702</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Allen</surname><given-names>DZ</given-names> </name><name name-style="western"><surname>Challapalli</surname><given-names>S</given-names> </name><name name-style="western"><surname>McKee</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Impact of COVID-19 on nationwide pediatric otolaryngology: otitis media and myringotomy tube trends</article-title><source>Am J Otolaryngol</source><year>2022</year><volume>43</volume><issue>2</issue><fpage>103369</fpage><pub-id pub-id-type="doi">10.1016/j.amjoto.2021.103369</pub-id><pub-id pub-id-type="medline">35033925</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Smith</surname><given-names>DF</given-names> </name><name name-style="western"><surname>Boss</surname><given-names>EF</given-names> </name></person-group><article-title>Racial/ethnic and socioeconomic disparities in the prevalence and treatment of otitis media in children in the United States</article-title><source>Laryngoscope</source><year>2010</year><month>11</month><volume>120</volume><issue>11</issue><fpage>2306</fpage><lpage>2312</lpage><pub-id pub-id-type="doi">10.1002/lary.21090</pub-id><pub-id pub-id-type="medline">20939071</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Patel</surname><given-names>S</given-names> </name><name name-style="western"><surname>Schroeder</surname><given-names>JW</given-names> </name></person-group><article-title>Disparities in children with otitis media: the effect of insurance status</article-title><source>Otolaryngol Neck Surg</source><year>2011</year><volume>144</volume><issue>1</issue><fpage>73</fpage><lpage>77</lpage><pub-id pub-id-type="doi">10.1177/0194599810391428</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nieman</surname><given-names>CL</given-names> </name><name name-style="western"><surname>Tunkel</surname><given-names>DE</given-names> </name><name name-style="western"><surname>Boss</surname><given-names>EF</given-names> </name></person-group><article-title>Do race/ethnicity or socioeconomic status affect why we place ear tubes in children?</article-title><source>Int J Pediatr Otorhinolaryngol</source><year>2016</year><month>09</month><volume>88</volume><fpage>98</fpage><lpage>103</lpage><pub-id pub-id-type="doi">10.1016/j.ijporl.2016.06.029</pub-id><pub-id pub-id-type="medline">27497394</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dunlop</surname><given-names>JH</given-names> </name><name name-style="western"><surname>Keet</surname><given-names>CA</given-names> </name></person-group><article-title>Epidemiology of food allergy</article-title><source>Immunol Allergy Clin North Am</source><year>2018</year><month>02</month><volume>38</volume><issue>1</issue><fpage>13</fpage><lpage>25</lpage><pub-id pub-id-type="doi">10.1016/j.iac.2017.09.002</pub-id><pub-id pub-id-type="medline">29132669</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tang</surname><given-names>MLK</given-names> </name><name name-style="western"><surname>Mullins</surname><given-names>RJ</given-names> </name></person-group><article-title>Food allergy: is prevalence increasing?</article-title><source>Intern Med J</source><year>2017</year><month>03</month><volume>47</volume><issue>3</issue><fpage>256</fpage><lpage>261</lpage><pub-id pub-id-type="doi">10.1111/imj.13362</pub-id><pub-id pub-id-type="medline">28260260</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Keet</surname><given-names>CA</given-names> </name><name name-style="western"><surname>Savage</surname><given-names>JH</given-names> </name><name name-style="western"><surname>Seopaul</surname><given-names>S</given-names> </name><name name-style="western"><surname>Peng</surname><given-names>RD</given-names> </name><name name-style="western"><surname>Wood</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Matsui</surname><given-names>EC</given-names> </name></person-group><article-title>Temporal trends and racial/ethnic disparity in self-reported pediatric food allergy in the United States</article-title><source>Ann Allergy Asthma Immunol</source><year>2014</year><month>03</month><volume>112</volume><issue>3</issue><fpage>222</fpage><lpage>229</lpage><pub-id pub-id-type="doi">10.1016/j.anai.2013.12.007</pub-id><pub-id pub-id-type="medline">24428971</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bock</surname><given-names>SA</given-names> </name></person-group><article-title>Prospective appraisal of complaints of adverse reactions to foods in children during the first 3 years of life</article-title><source>Pediatrics</source><year>1987</year><month>05</month><volume>79</volume><issue>5</issue><fpage>683</fpage><lpage>688</lpage><pub-id pub-id-type="doi">10.1542/peds.79.5.683</pub-id><pub-id pub-id-type="medline">3575022</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Eggesb&#x00F8;</surname><given-names>M</given-names> </name><name name-style="western"><surname>Botten</surname><given-names>G</given-names> </name><name name-style="western"><surname>Halvorsen</surname><given-names>R</given-names> </name><name name-style="western"><surname>Magnus</surname><given-names>P</given-names> </name></person-group><article-title>The prevalence of CMA/CMPI in young children: the validity of parentally perceived reactions in a population-based study</article-title><source>Allergy</source><year>2001</year><month>05</month><volume>56</volume><issue>5</issue><fpage>393</fpage><lpage>402</lpage><pub-id pub-id-type="doi">10.1034/j.1398-9995.2001.056005393.x</pub-id><pub-id pub-id-type="medline">11350302</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Osborne</surname><given-names>NJ</given-names> </name><name name-style="western"><surname>Koplin</surname><given-names>JJ</given-names> </name><name name-style="western"><surname>Martin</surname><given-names>PE</given-names> </name><etal/></person-group><article-title>Prevalence of challenge-proven IgE-mediated food allergy using population-based sampling and predetermined challenge criteria in infants</article-title><source>J Allergy Clin Immunol</source><year>2011</year><month>03</month><volume>127</volume><issue>3</issue><fpage>668</fpage><lpage>76</lpage><pub-id pub-id-type="doi">10.1016/j.jaci.2011.01.039</pub-id><pub-id pub-id-type="medline">21377036</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pali-Sch&#x00F6;ll</surname><given-names>I</given-names> </name><name name-style="western"><surname>Jensen-Jarolim</surname><given-names>E</given-names> </name></person-group><article-title>Gender aspects in food allergy</article-title><source>Curr Opin Allergy Clin Immunol</source><year>2019</year><month>06</month><volume>19</volume><issue>3</issue><fpage>249</fpage><lpage>255</lpage><pub-id pub-id-type="doi">10.1097/ACI.0000000000000529</pub-id><pub-id pub-id-type="medline">30893085</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jiang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Warren</surname><given-names>CM</given-names> </name><name name-style="western"><surname>Brewer</surname><given-names>A</given-names> </name><name name-style="western"><surname>Soffer</surname><given-names>G</given-names> </name><name name-style="western"><surname>Gupta</surname><given-names>RS</given-names> </name></person-group><article-title>Racial, ethnic, and socioeconomic differences in food allergies in the US</article-title><source>JAMA Netw Open</source><year>2023</year><month>06</month><day>1</day><volume>6</volume><issue>6</issue><fpage>e2318162</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2023.18162</pub-id><pub-id pub-id-type="medline">37314805</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bilaver</surname><given-names>LA</given-names> </name><name name-style="western"><surname>Kanaley</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Fierstein</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Gupta</surname><given-names>RS</given-names> </name></person-group><article-title>Prevalence and correlates of food allergy among Medicaid-enrolled United States children</article-title><source>Acad Pediatr</source><year>2021</year><volume>21</volume><issue>1</issue><fpage>84</fpage><lpage>92</lpage><pub-id pub-id-type="doi">10.1016/j.acap.2020.03.005</pub-id><pub-id pub-id-type="medline">32200110</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nevison</surname><given-names>C</given-names> </name><name name-style="western"><surname>Zahorodny</surname><given-names>W</given-names> </name></person-group><article-title>Race/ethnicity-resolved time trends in United States ASD prevalence estimates from IDEA and ADDM</article-title><source>J Autism Dev Disord</source><year>2019</year><month>12</month><volume>49</volume><issue>12</issue><fpage>4721</fpage><lpage>4730</lpage><pub-id pub-id-type="doi">10.1007/s10803-019-04188-6</pub-id><pub-id pub-id-type="medline">31435818</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Additional figures and tables.</p><media xlink:href="ai_v4i1e62985_app1.docx" xlink:title="DOCX File, 2326 KB"/></supplementary-material><supplementary-material id="app2"><label>Checklist 1</label><p>CREMLS checklist. CREMLS: Consolidated Reporting of Machine Learning Studies.</p><media xlink:href="ai_v4i1e62985_app2.docx" xlink:title="DOCX File, 22 KB"/></supplementary-material></app-group></back></article>