<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR AI</journal-id><journal-id journal-id-type="publisher-id">ai</journal-id><journal-id journal-id-type="index">41</journal-id><journal-title>JMIR AI</journal-title><abbrev-journal-title>JMIR AI</abbrev-journal-title><issn pub-type="epub">2817-1705</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v4i1e64519</article-id><article-id pub-id-type="doi">10.2196/64519</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Machine-Learning Predictive Tool for the Individualized Prediction of Outcomes of Hematopoietic Cell Transplantation for Sickle Cell Disease: Registry-Based Study</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Chandrasekar</surname><given-names>Rajagopal Subramaniam</given-names></name><degrees>MTech</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kane</surname><given-names>Michael</given-names></name><degrees>PhD, MA, MS</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Krishnamurti</surname><given-names>Lakshmanan</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Section of Pediatric Hematology/Oncology/BMT, Yale School of Medicine</institution><addr-line>2073 A, LMP Builidng 330 Cedar Streeet</addr-line><addr-line>New Haven</addr-line><addr-line>CT</addr-line><country>United States</country></aff><aff id="aff2"><institution>School of Data Science and AI, Indian Institute of Technology, Madras</institution><addr-line>Chennai</addr-line><country>India</country></aff><aff id="aff3"><institution>Department of Lymphoma-Myeloma, The University of Texas MD Anderson Cancer Center</institution><addr-line>Houston</addr-line><addr-line>TX</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Emam</surname><given-names>Khaled El</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Singh</surname><given-names>Deepak</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Takahashi</surname><given-names>Takuto</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Lakshmanan Krishnamurti, MD, Section of Pediatric Hematology/Oncology/BMT, Yale School of Medicine, 2073 A, LMP Builidng 330 Cedar Streeet, New Haven, CT, United States, 1 412-612-4761; <email>lakshmanan.krishnamurti@yale.edu</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>15</day><month>9</month><year>2025</year></pub-date><volume>4</volume><elocation-id>e64519</elocation-id><history><date date-type="received"><day>21</day><month>07</month><year>2024</year></date><date date-type="rev-recd"><day>20</day><month>04</month><year>2025</year></date><date date-type="accepted"><day>09</day><month>06</month><year>2025</year></date></history><copyright-statement>&#x00A9; Rajagopal Subramaniam Chandrasekar, Michael Kane, Lakshmanan Krishnamurti. Originally published in JMIR AI (<ext-link ext-link-type="uri" xlink:href="https://ai.jmir.org">https://ai.jmir.org</ext-link>), 15.9.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR AI, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.ai.jmir.org/">https://www.ai.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://ai.jmir.org/2025/1/e64519"/><abstract><sec><title>Background</title><p>Disease-modifying therapies ameliorate disease severity of sickle cell disease (SCD), but hematopoietic cell transplantation (HCT), and more recently, autologous gene therapy are the only treatments that have curative potential for SCD. While registry-based studies provide population-level estimates, they do not address the uncertainty regarding individual outcomes of HCT. Computational machine learning (ML) has the potential to identify generalizable predictive patterns and quantify uncertainty in estimates, thereby improving clinical decision-making. There is no existing ML model for SCD, and ML models for HCT for other diseases focus on single outcomes rather than all relevant outcomes.</p></sec><sec><title>Objective</title><p>This study aims to address the existing knowledge gap by developing and validating an individualized ML prediction model SPRIGHT (Sickle Cell Predicting Outcomes of Hematopoietic Cell Transplantation), incorporating multiple relevant pre-HCT features to make predictions of key post-HCT clinical outcomes.</p></sec><sec sec-type="methods"><title>Methods</title><p>We applied a supervised random forest ML model to clinical parameters in a deidentified Center for International Blood and Marrow Transplant Research (CIBMTR) dataset of 1641 patients who underwent HCT between 1991 and 2021 and were followed for a median of 42.5 (IQR 52.5;range 0.3&#x2010;312.9) months. We applied forward and reverse feature selection methods to optimize a set of predictive variables. To counter the imbalance bias toward predicting positive outcomes due to the small number of negative outcomes, we constructed a training dataset, taking each outcome as variable of interest, and performed 2-times repeated 10-fold cross-validation. SPRIGHT is a web-based individualized prediction tool accessible by smartphone, tablet, or personal computer. It incorporates predictive variables of age, age group, Karnofsky or Lansky score, comorbidity index, recipient cytomegalovirus seropositivity, history of acute chest syndrome, need for exchange transfusion, occurrence and frequency of vaso-occlusive crisis (VOC) before HCT, and either a published or custom chemotherapy or radiation conditioning, serotherapy, and graft-versus-host disease prophylaxis. SPRIGHT makes individualized predictions of overall survival (OS), event-free survival, graft failure, acute graft-versus-host disease (AGVHD), chronic graft-versus-host disease (CGVHD), and occurrence of VOC or stroke post-HCT.</p></sec><sec sec-type="results"><title>Results</title><p>The model's ability to distinguish between positive and negative classes, that is, discrimination, was evaluated using the area under the curve, accuracy, and balanced accuracy. Discrimination met or exceeded published predictive benchmarks with area under the curve for OS (0.7925), event-free survival (0.7900), graft failure (0.8024), acute graft-versus-host disease (0.6793), chronic graft-versus-host disease (0.7320), and VOC post-HCT (0.8779). SPRIGHT revealed good calibration with a slope of 0.87&#x2010;0.96, with small negative intercepts (&#x2013;0.01 to 0.03), for 4 out of the 5 outcomes. However, OS exhibits nonideal calibration, which may be reflective of the overall high OS in all subgroups.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>A web-based ML prediction tool incorporating multiple clinically relevant variables predicts key clinical outcomes with a high level of discrimination and calibration and has potential in shared decision-making</p></sec></abstract><kwd-group><kwd>sickle cell disease</kwd><kwd>SCD</kwd><kwd>prediction algorithms</kwd><kwd>hematopoietic stem cell transplantation</kwd><kwd>machine learning</kwd><kwd>ML</kwd><kwd>predictive tool</kwd><kwd>prediction</kwd><kwd>hematopoietic cell transplantation</kwd><kwd>HCT</kwd><kwd>hematopoietic cell</kwd><kwd>registry-based study</kwd><kwd>clinical decision-making</kwd><kwd>prediction model</kwd><kwd>clinical outcomes</kwd><kwd>gene therapy</kwd><kwd>shared decision-making</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>The complications of sickle cell disease (SCD) can be prevented or ameliorated by disease-modifying therapies [<xref ref-type="bibr" rid="ref1">1</xref>], but hematopoietic cell transplantation (HCT), and more recently, gene therapy remain the only therapeutic options with curative intent [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref12">12</xref>]. Population-level studies demonstrate the association of outcomes of HCT with age, type of donor, type of conditioning, and graft-versus-host disease (GVHD) prophylaxis [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>], but do not address the uncertainty regarding individualized outcomes of HCT. Such uncertainty contributes to the decisional dilemma and is a barrier to shared decision-making. An individualized prediction model that incorporates all predictive variables and provides individualized estimates of key outcomes of HCT of interest to patients and their physicians has the potential to inform shared decision-making [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref15">15</xref>]. Brazauskas et al [<xref ref-type="bibr" rid="ref16">16</xref>] have proposed a predictive model based on the age of the recipient and the type of donor. However, their model does not incorporate other clinically relevant patient, HCT, and disease characteristics and does not include all key outcomes. Computational machine learning (ML) has the potential to determine generalizable predictive patterns and quantify uncertainty, but published ML predictive models for HCT are limited to predicting single clinical outcomes [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref30">30</xref>]. To address the knowledge gap, we developed and described the initial validation of SPRIGHT (Sickle Cell Predicting Outcomes of Hematopoietic Cell Transplantation), an individualized ML prediction model for outcomes of HCT for SCD, incorporating multiple relevant features to make predictions of key clinical outcomes.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Dataset</title><p>We developed SPRIGHT using an anonymized HCT for the SCD dataset [<xref ref-type="bibr" rid="ref31">31</xref>] derived from data submitted to the Center for International Bone Marrow Transplant Research (CIBMTR) registry on children and adults undergoing HCT for SCD between 1991 and 2021 in the United States. The dataset was obtained through the NHLBI (National Heart Lung and Blood Institute) Biologic Specimen and Data Repository Information Coordinating Center (BIOLINCC) [<xref ref-type="bibr" rid="ref32">32</xref>].</p><p>The CIBMTR maintains a research database to serve as a comprehensive data source that can be used to study cellular therapies, including HCT. All US transplant centers are required to submit outcomes data on all allogeneic transplants when either the stem cell donation or the transplant occurs within the United States. CIBMTR assigns patients to either a Transplant Essential Data (TED) track, which collects core transplant data, or a Comprehensive Report Form (CRF) track that captures detailed disease- and treatment-related data [<xref ref-type="bibr" rid="ref31">31</xref>]. Assignment to each track is based on submission of the initial pretransplantation TED 2000032419 form and uses a weighted randomization algorithm designed to produce a cohort representative of current clinical practice. All centers submit a Pre-HCT TED Form (Form 2400) for each allogeneic (related or unrelated) HCT.</p><p>Of 1641 patients undergoing HCT for SCD between 1991 and 2021, on whom data were submitted to CIBMTR, detailed CRFs were submitted on 763 patients. Of the patients in the dataset, 84% (1377/1641) had undergone HCT after 2007. We performed the imputation of missing data using MissForest, an ML data imputation algorithm that operates on random forest (RF).</p></sec><sec id="s2-2"><title>Feature Selection</title><p>We identified overall survival (OS), event-free survival (EFS), graft failure (GF), acute graft-versus-host disease (AGVHD), and chronic graft-versus-host disease (CGVHD) as key outcomes. We used wrapper methods of backward feature elimination (BFE) and forward sequential selection (FSS) to select and optimize the input variables [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>]. The BFE procedure begins with a complete set of features and a chosen ML model. The model is trained, and the importance of each feature is evaluated based on the model&#x2019;s coefficients or feature importance scores, and then the least important feature is discarded, and the model is retrained on the remaining features. This process is repeated until a predetermined number of features is reached or until further removal of features leads to a significant decrease in model performance. Using FSS, we incrementally built a feature set starting with an empty model, sequentially adding a feature that most improves the model performance at each step, as evaluated through a predefined metric like cross-validation score. We continued this stepwise addition until new features no longer significantly enhanced the model or a specified number of features was reached. We have included the detailed descriptive statistics and missingness for each of the selected features and outcomes of interest in Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> and Table S4 in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref></p></sec><sec id="s2-3"><title>Model Design</title><p>The model was designed by subsampling the majority class to create subtraining datasets, followed by pooling and thresholding to obtain the final prediction [<xref ref-type="bibr" rid="ref35">35</xref>]. We determined the discriminative performance of the model, which refers to how well the predictions can separate between 2 groups of participants, that is, those with or without an outcome. Discrimination was quantified by the concordance (c) statistic (index), which for binary outcomes, is equivalent to the area under the curve (AUC). We also assessed accuracy, that is, the percentage of correct predictions out of all predictions correct or incorrect in the model. We also assessed balanced accuracy, which is an accuracy adjusted for imbalance and is derived by averaging sensitivity and specificity, so that each class&#x2019;s importance is equal. We compared RF, extreme gradient boosting, logistic regression, Naive Bayes, AdaBoost, and support vector classification algorithms (<xref ref-type="table" rid="table1">Table 1</xref>). The HCT for SCD dataset spans children and adults undergoing HCT between 1991 and 2021. During this time, there have been many changes in conditioning regimens and improvements in supportive care. Gluckman et al [<xref ref-type="bibr" rid="ref6">6</xref>] reported that EFS was higher in patients who underwent HCT in or after 2007 as compared to those who underwent HCT in or before 2006 (HR [hazard ratio] 0.95, CI 0.90&#x2010;0.99; <italic>P</italic>=.01). To determine if the model performed consistently across eras, we tested model performance in patients with the year of HCT &#x003C;2007 versus HCT &#x003E;2007. We also tested model performance across age at HCT &#x003C;10, &#x003C;18, and &#x003E;18 years, respectively, including outcomes at 1- and 3-year post-HCT (<xref ref-type="table" rid="table2">Table 2</xref>).</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Model performance: comparison of area under the curve of different algorithms for each outcome.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" rowspan="2">Model</td><td align="left" valign="bottom" colspan="5">Outcome of interest</td></tr><tr><td align="left" valign="bottom">EFS<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="left" valign="bottom">OS<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td><td align="left" valign="bottom">GF<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td><td align="left" valign="bottom">AGVHD<sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup></td><td align="left" valign="bottom">CGVHD<sup><xref ref-type="table-fn" rid="table1fn5">e</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">Random forest entire dataset</td><td align="char" char="." valign="top">0.7900</td><td align="char" char="." valign="top">0.7925</td><td align="char" char="." valign="top">0.8024</td><td align="char" char="." valign="top">0.6793</td><td align="char" char="." valign="top">0.7320</td></tr><tr><td align="left" valign="top">XGBoost<sup><xref ref-type="table-fn" rid="table1fn6">f</xref></sup></td><td align="char" char="." valign="top">0.7754</td><td align="char" char="." valign="top">0.7785</td><td align="char" char="." valign="top">0.7948</td><td align="char" char="." valign="top">0.6731</td><td align="char" char="." valign="top">0.7230</td></tr><tr><td align="left" valign="top">Logistic regression</td><td align="char" char="." valign="top">0.7464</td><td align="char" char="." valign="top">0.7835</td><td align="char" char="." valign="top">0.7578</td><td align="char" char="." valign="top">0.6925</td><td align="char" char="." valign="top">0.7019</td></tr><tr><td align="left" valign="top">Na&#x00EF;ve Bayes</td><td align="char" char="." valign="top">0.6930</td><td align="char" char="." valign="top">0.7111</td><td align="char" char="." valign="top">0.7107</td><td align="char" char="." valign="top">0.6386</td><td align="char" char="." valign="top">0.6384</td></tr><tr><td align="left" valign="top">Adaboost</td><td align="char" char="." valign="top">0.7452</td><td align="char" char="." valign="top">0.7806</td><td align="char" char="." valign="top">0.7561</td><td align="char" char="." valign="top">0.6934</td><td align="char" char="." valign="top">0.7005</td></tr><tr><td align="left" valign="top">Support vector classifier</td><td align="char" char="." valign="top">0.7357</td><td align="char" char="." valign="top">0.7810</td><td align="char" char="." valign="top">0.7561</td><td align="char" char="." valign="top">0.6841</td><td align="char" char="." valign="top">0.7061</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>EFS: event-free survival.</p></fn><fn id="table1fn2"><p><sup>b</sup>OS: overall survival.</p></fn><fn id="table1fn3"><p><sup>c</sup>GF: graft failure.</p></fn><fn id="table1fn4"><p><sup>d</sup>AGVHD: acute graft-versus-host disease.</p></fn><fn id="table1fn5"><p><sup>e</sup>CGVHD: chronic graft-versus-host disease.</p></fn><fn id="table1fn6"><p><sup>f</sup>XGBoost: extreme gradient boosting.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Model performance: Comparison of area under the curve across different paradigms and data time periods.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" rowspan="2">Paradigm</td><td align="left" valign="bottom" colspan="5">Outcome of interest</td></tr><tr><td align="left" valign="bottom">EFS<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="bottom">OS<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td><td align="left" valign="bottom">GF<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="bottom">AGVHD<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="bottom">CGVHD<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">All data</td><td align="char" char="." valign="top">0.790</td><td align="char" char="." valign="top">0.793</td><td align="char" char="." valign="top">0.802</td><td align="char" char="." valign="top">0.679</td><td align="char" char="." valign="top">0.732</td></tr><tr><td align="left" valign="top">Post 2007 data</td><td align="char" char="." valign="top">0.787</td><td align="char" char="." valign="top">0.775</td><td align="char" char="." valign="top">0.783</td><td align="char" char="." valign="top">0.702</td><td align="char" char="." valign="top">0.729</td></tr><tr><td align="left" valign="top">1 Year outcome analysis on post 2007 data</td><td align="char" char="." valign="top">0.801</td><td align="char" char="." valign="top">0.788</td><td align="char" char="." valign="top">0.807</td><td align="char" char="." valign="top">0.741</td><td align="char" char="." valign="top">0.705</td></tr><tr><td align="left" valign="top">3 Year outcome analysis on post 2007</td><td align="char" char="." valign="top">0.792</td><td align="char" char="." valign="top">0.771</td><td align="char" char="." valign="top">0.82</td><td align="char" char="." valign="top">0.730</td><td align="char" char="." valign="top">0.721</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>EFS: event-free survival.</p></fn><fn id="table2fn2"><p><sup>b</sup>OS: overall survival.</p></fn><fn id="table2fn3"><p><sup>c</sup>GF: graft failure.</p></fn><fn id="table2fn4"><p><sup>d</sup>AGVHD: acute graft-versus-host disease.</p></fn><fn id="table2fn5"><p><sup>e</sup>CGVHD: chronic graft-versus-host disease.</p></fn></table-wrap-foot></table-wrap><p>The accuracy of risk estimates, relating to the agreement between the estimated and observed number of events, is called &#x201C;calibration [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>].&#x201D; Calibration is crucial in predictive algorithms because it ensures the accuracy of risk estimates, which directly affects clinical decision-making and patient expectations. Poor calibration can lead to systematic overestimation or underestimation of risk, resulting in false expectations and potentially harmful decisions [<xref ref-type="bibr" rid="ref36">36</xref>]. We performed causal isotonic calibration, a novel nonparametric method for calibrating predictors of heterogeneous treatment effect [<xref ref-type="bibr" rid="ref37">37</xref>]. We performed a 5-fold internal cross-validation on the training set to determine the optimal calibration. The resultant calibration model was then applied to the predictions during evaluation. To adjust for the bias caused due to undersampling, we recalibrate the probabilities according to the method by Pozollo et al [<xref ref-type="bibr" rid="ref38">38</xref>]. We evaluated the calibration curve, slope, and intercept across different outcomes of interest in the post-2007 data (<xref ref-type="table" rid="table3">Table 3</xref>).</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Calibration analysis: slope and intercept across different outcomes of interest for post-2007 data.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" rowspan="2">Outcome of interest</td><td align="left" valign="bottom" colspan="2">Calibration property</td></tr><tr><td align="left" valign="bottom">Slope</td><td align="left" valign="bottom">Intercept</td></tr></thead><tbody><tr><td align="left" valign="top">EFS<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="char" char="." valign="top">0.93</td><td align="char" char="." valign="top">&#x2013;0.02</td></tr><tr><td align="left" valign="top">OS<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="char" char="." valign="top">0.75</td><td align="char" char="." valign="top">&#x2013;0.07</td></tr><tr><td align="left" valign="top">GF<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="char" char="." valign="top">0.9</td><td align="char" char="." valign="top">&#x2013;0.02</td></tr><tr><td align="left" valign="top">AGVHD<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></td><td align="char" char="." valign="top">0.96</td><td align="char" char="." valign="top">&#x2013;0.03</td></tr><tr><td align="left" valign="top">CGVHD<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></td><td align="char" char="." valign="top">0.87</td><td align="char" char="." valign="top">&#x2013;0.01</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>EFS: event-free survival.</p></fn><fn id="table3fn2"><p><sup>b</sup>OS: overall survival.</p></fn><fn id="table3fn3"><p><sup>c</sup>GF: graft failure.</p></fn><fn id="table3fn4"><p><sup>d</sup>AGVHD: acute graft-versus-host disease.</p></fn><fn id="table3fn5"><p><sup>e</sup>CGVHD: chronic graft-versus-host disease.</p></fn></table-wrap-foot></table-wrap><p>To understand the contributions of each feature to the predictive model, we use the Shapley additive explanations (SHAP) scores (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendices 3</xref><xref ref-type="supplementary-material" rid="app4"/><xref ref-type="supplementary-material" rid="app5"/><xref ref-type="supplementary-material" rid="app6"/>-<xref ref-type="supplementary-material" rid="app7">7</xref>). SHAP scores are based on game theory&#x2019;s Shapley values, quantifying each feature&#x2019;s marginal contribution to individual predictions. They are calculated by measuring how each feature affects the model output when included or excluded from all possible feature combinations.</p></sec><sec id="s2-4"><title>The Problem of Imbalance</title><p>The outcomes data for HCT for SCD is imbalanced, with very few negative outcomes. This imbalance has the potential to lead to a prediction bias, where an uncorrected model default may be skewed toward predicting positive outcomes. To address the problem of imbalance, we constructed a training dataset taking each outcome as variable of interest. We included randomly sampled positive outcomes, typically 1.5&#x2010;3 times the total instances of the variable of interest. To address the issue of class imbalance, we used a 2-step approach involving bootstrapping and consensus-based decision-making. Initially, we generated 20 bootstrapped datasets by undersampling the majority class to achieve a (2-3):1 ratio with the minority class. These datasets served as the training sets for our predictive models, ensuring a balanced representation of classes during model training. Once the models were trained, each was then tested on a consistent test dataset to obtain a series of predictions. These individual predictions were subsequently pooled across all models. A final prediction model for each test instance was determined based on a consensus threshold. If the majority of the models exceeded a predetermined threshold agreed on a particular class, that class was assigned as the outcome for the instance. We ran the test dataset and used a RF algorithm on a 2-times repeated 10-fold cross-validation to demonstrate our model&#x2019;s versatility and response to unknown data (<xref ref-type="fig" rid="figure1">Figure 1</xref>). We assigned value 1 for a negative outcome prediction and &#x2212;1 for a positive outcome prediction and found the average sum across the 20 trials for each element.</p><p>Throughout the paper, we are guided by the CREMLS (Consolidated Reporting of Machine Learning Studies) guidelines [<xref ref-type="bibr" rid="ref39">39</xref>], to ensure transparency and rigor in reporting. We have attached a completed author CREMLS checklist.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Data partitioning, imputation, cross-validation, and item optimization A) Dataset preprocessing, imputation, and feature selection. (B) Model finalization and test of predictive performance. BFE: backward feature elimination; FSS: forward sequential selection.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e64519_fig01.png"/></fig></sec><sec id="s2-5"><title>Ethical Considerations</title><p>The Institutional Review Board at Yale University determined on March 3, 2033, that this study did not constitute human participants research per IRB protocol number 2000032419.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Feature Selection and Optimization</title><p>Of the 160 variables in the dataset, we selected 31 potentially clinically relevant predictive variables. Through FSS and BFE processes, we selected a final set of 17 predictive variables grouped into 3 categories. Patient Data variables included age at transplant, age group at transplant, sex, Karnofsky or Lansky score, HCT-comorbidity index, and recipient CMV serostatus. SCD variables included Number of ACS syndromes within 2 years pre-HCT, Required exchange transfusion, Vaso-occlusive crisis needing hospitalization in 2 years pre-HCT, and Hospitalization frequency for vaso-occlusive crises. Transplant Data variables included Donor type, Graft type, Conditioning intensity, Conditioning regimen, Serotherapy (ATG or Alemtuzumab), GVHD prophylaxis, and Donor-recipient HLA matching.</p></sec><sec id="s3-2"><title>Evaluation Outcomes and Model Performance</title><p>Discrimination in predictive performance is evaluated using accuracy, the percentage of correct predictions out of all predictions, balanced accuracy, the average sensitivity and specificity, and each class&#x2019;s importance is equal, and AUC, the measure of a model&#x2019;s true positive rate against a false positive rate, indicates the ability to differentiate classes. AUC is the metric used in published literature. The RF model achieved the highest predictive AUC (<xref ref-type="table" rid="table1">Table 1</xref>) across multiple clinical outcomes. We measure our model&#x2019;s performance using the benchmark established in the literature. To determine the statistical validity, we implemented the method proposed by Bouckaert and Frank [<xref ref-type="bibr" rid="ref40">40</xref>]. We first performed 2&#x00D7;10 repeated cross-validation and obtained the performance of each of the models for each fold. We then applied Nadeau and Bengio&#x2019;s [<xref ref-type="bibr" rid="ref41">41</xref>] correction, which accounts for training set overlap in the variance estimation, to check whether the mean AUC of the RF is greater than the mean AUC of the other models across folds. The differences in the mean AUC were statistically significant for EFS, OS, GF, AGVHD, and CGVHD (<italic>P</italic>&#x003C;.05). We performed hyperparameter tuning using grid search cross-validation for the RF model. The ideal hyperparameters are described in <xref ref-type="table" rid="table4">Table 4</xref>. AUC, accuracy, and balanced accuracy equaled or exceeded the benchmarks of the ML predictive tools in the published literature [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. The RF model has been previously reported to have the best AUC in predicting survival following HCT [<xref ref-type="bibr" rid="ref24">24</xref>]. Brazauskas et al [<xref ref-type="bibr" rid="ref16">16</xref>] published benchmark AUC for EFS of 0.72, and Taheriyan et al [<xref ref-type="bibr" rid="ref42">42</xref>] reported benchmarks AUC 0.82 for AGVHD post-HCT. Accuracy and balanced accuracy were excellent for EFS (0.76, 0.69), OS (0.82, 0.68), GF (0.8, 0.71), vaso-occlusive pain post-HCT (0.9, 0.78), stroke post-HCT (0.92, 0.65), acute GVHD (0.71, 0.60), CGVHD (0.72, 0.63).</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Final tuned hyperparameters of the random forest model for each outcome.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" rowspan="2">Outcome of interest</td><td align="left" valign="bottom" colspan="6">Random forest hyperparameters</td></tr><tr><td align="left" valign="bottom">Max depth</td><td align="left" valign="bottom">Min samples split</td><td align="left" valign="bottom">Min samples leaf</td><td align="left" valign="bottom">Criterion</td><td align="left" valign="bottom">CCP_alpha<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="bottom">Max features</td></tr></thead><tbody><tr><td align="left" valign="top">EFS<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup></td><td align="char" char="." valign="top">20</td><td align="char" char="." valign="top">10</td><td align="char" char="." valign="top">16</td><td align="left" valign="top">entropy</td><td align="char" char="." valign="top">0.0</td><td align="char" char="." valign="top">0.7</td></tr><tr><td align="left" valign="top">OS<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup></td><td align="char" char="." valign="top">20</td><td align="char" char="." valign="top">10</td><td align="char" char="." valign="top">8</td><td align="left" valign="top">entropy</td><td align="char" char="." valign="top">0.01</td><td align="left" valign="top">0.125</td></tr><tr><td align="left" valign="top">GF<sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup></td><td align="char" char="." valign="top">20</td><td align="char" char="." valign="top">20</td><td align="char" char="." valign="top">16</td><td align="left" valign="top">entropy</td><td align="char" char="." valign="top">0.0</td><td align="char" char="." valign="top">0.5</td></tr><tr><td align="left" valign="top">AGVHD<sup><xref ref-type="table-fn" rid="table4fn5">e</xref></sup></td><td align="char" char="." valign="top">5</td><td align="char" char="." valign="top">20</td><td align="char" char="." valign="top">8</td><td align="left" valign="top">entropy</td><td align="char" char="." valign="top">0.0</td><td align="char" char="." valign="top">0.5</td></tr><tr><td align="left" valign="top">CGVHD<sup><xref ref-type="table-fn" rid="table4fn6">f</xref></sup></td><td align="char" char="." valign="top">15</td><td align="char" char="." valign="top">10</td><td align="char" char="." valign="top">8</td><td align="left" valign="top">entropy</td><td align="char" char="." valign="top">0.0</td><td align="char" char="." valign="top">0.5</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>CCP_alpha: cost complexity pruning alpha.</p></fn><fn id="table4fn2"><p><sup>b</sup>EFS: event-free survival.</p></fn><fn id="table4fn3"><p><sup>c</sup>OS: overall survival.</p></fn><fn id="table4fn4"><p><sup>d</sup>GF: graft failure.</p></fn><fn id="table4fn5"><p><sup>e</sup>AGVHD: acute graft-versus-host disease.</p></fn><fn id="table4fn6"><p><sup>f</sup>CGVHD: chronic graft-versus-host disease.</p></fn></table-wrap-foot></table-wrap><p>SPRIGHT retained high AUC in subpopulations, including patients &#x2264;10, &#x2264;18, &#x003E;18 of age in undergoing HCT after 2007, as well as 1- and 3-year survival analysis (<xref ref-type="table" rid="table1">Table 1</xref>).</p><p>Calibration is the agreement between the estimated and observed number of events, for major outcomes. A calibration slope of 1 and an intercept close to zero are associated with good calibration. SPRIGHT revealed good calibration with a slope range of 0.87&#x2010;0.96, with small negative intercepts (&#x2212;0.01 to 0.03), for 4 out of the 5 outcomes. However, OS exhibits nonideal calibration and may be reflective of the overall high OS in all subgroups (<xref ref-type="fig" rid="figure2">Figure 2</xref>).</p><p>Feature importance analysis using SHAP values revealed consistent patterns across all outcomes. Age at transplantation and donor type emerged as the most influential predictors, corroborating previous findings by Brazauskas et al [<xref ref-type="bibr" rid="ref16">16</xref>] and Eapen et al [<xref ref-type="bibr" rid="ref5">5</xref>]. Disease severity indicators, most importantly the frequency of acute chest syndrome episodes in the 2 years preceding HCT, were identified as another critical predictor. The frequency of VOC requiring hospitalization and the need for exchange transfusions also demonstrated substantial predictive importance. This suggests the utility of including pretransplant disease characteristics for predicting outcomes. The comprehensive SHAP analysis, including feature importance rankings and their relative contributions to model predictions, is presented in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendices 3</xref><xref ref-type="supplementary-material" rid="app4"/><xref ref-type="supplementary-material" rid="app5"/><xref ref-type="supplementary-material" rid="app6"/>-<xref ref-type="supplementary-material" rid="app7">7</xref>.</p><p>To demonstrate the clinical utility of our model, we analyzed predictions across 3 distinct hypothetical patient scenarios. Case-specific patient characteristics and their corresponding predicted outcomes are detailed in <xref ref-type="table" rid="table5">Table 5</xref> Table S3 in <xref ref-type="supplementary-material" rid="app8">Multimedia Appendix 8</xref> respectively. The model&#x2019;s predictions aligned with established clinical observations, showing less favorable outcomes in cases involving non-HLA identical donors and in older patients with more severe disease characteristics, which were consistent with previous studies [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref16">16</xref>].</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Calibration of SPRIGHT for various outcomes. Calibration curve, slope, and intercept for (A). Event-free survival, (B) graft failure, (C) acute graft-versus-host disease (GVHD), and (D) chronic GVHD. Overall survival (OS) shows a nonideal calibration slope of 0.75 and intercept of &#x2212;0.07 (data not shown) and may be reflective of the overall high OS in all subgroups.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e64519_fig02.png"/></fig><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>Hypothetical patient profiles with varying age, donor type, and disease severity.</p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Data</td><td align="left" valign="bottom">Patient 1</td><td align="left" valign="bottom">Patient 2</td><td align="left" valign="bottom">Patient 3</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="4">Patient data</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Age at transplant (year)</td><td align="char" char="." valign="top">6</td><td align="char" char="." valign="top">6</td><td align="char" char="." valign="top">16</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Age group (year)</td><td align="char" char="." valign="top">&#x2264;10</td><td align="char" char="." valign="top">&#x2264;10</td><td align="char" char="." valign="top">11-17</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sex</td><td align="left" valign="top">Male</td><td align="left" valign="top">Male</td><td align="left" valign="top">Male</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>KPS or Lansky<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup> score</td><td align="char" char="." valign="top">&#x003C;90</td><td align="char" char="." valign="top">&#x003C;90</td><td align="char" char="." valign="top">&#x003C;90</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>HCT<sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup>-Comorbidity index (in range)</td><td align="char" char="." valign="top">0-2</td><td align="char" char="." valign="top">0-2</td><td align="char" char="." valign="top">0-2</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Recipient CMV<sup><xref ref-type="table-fn" rid="table5fn3">c</xref></sup> serostatus</td><td align="left" valign="top">Negative</td><td align="left" valign="top">Negative</td><td align="left" valign="top">Negative</td></tr><tr><td align="left" valign="top" colspan="4">Transplant data</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Donor</td><td align="left" valign="top">HLA<sup><xref ref-type="table-fn" rid="table5fn4">d</xref></sup> identical sibling</td><td align="left" valign="top">HLA mismatch relative</td><td align="left" valign="top">HLA identical sibling</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Graft type</td><td align="left" valign="top">Bone marrow</td><td align="left" valign="top">Bone marrow</td><td align="left" valign="top">Bone marrow</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Donor-recipient HLA matching</td><td align="char" char="." valign="top">8/8</td><td align="char" char="." valign="top">7/8</td><td align="char" char="." valign="top">8/8</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Conditioning intensity</td><td align="left" valign="top">Myeloablative</td><td align="left" valign="top">Nonmyeloablative</td><td align="left" valign="top">Myeloablative</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Conditioning regimen</td><td align="left" valign="top">Flu/Bu<sup><xref ref-type="table-fn" rid="table5fn5">e</xref></sup></td><td align="left" valign="top">TBI/Cy/Flu/TT<sup><xref ref-type="table-fn" rid="table5fn6">f</xref></sup></td><td align="left" valign="top">Flu/Bu</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Serotherapy</td><td align="left" valign="top">ATG<sup><xref ref-type="table-fn" rid="table5fn7">g</xref></sup></td><td align="left" valign="top">ATG</td><td align="left" valign="top">ATG</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>GVHD<sup><xref ref-type="table-fn" rid="table5fn8">h</xref></sup> prophylaxis</td><td align="left" valign="top">CNI+MTX<sup><xref ref-type="table-fn" rid="table5fn9">i</xref></sup></td><td align="left" valign="top">Post-Cy+Siro&#x00B1; MMF<sup><xref ref-type="table-fn" rid="table5fn10">j</xref></sup></td><td align="left" valign="top">CNI+MTX</td></tr><tr><td align="left" valign="top" colspan="4">SCD<sup><xref ref-type="table-fn" rid="table5fn11">k</xref></sup> data</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Number of ACS<sup><xref ref-type="table-fn" rid="table5fn12">l</xref></sup> syndromes within 2 year pre-HCT</td><td align="char" char="." valign="top">0</td><td align="char" char="." valign="top">0</td><td align="char" char="." valign="top">2</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Require exchange transfusion</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>VOC<sup><xref ref-type="table-fn" rid="table5fn13">m</xref></sup> requiring hospitalization within 2 year pre-HCT</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">Yes</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Frequency of hospitalizations for VOC</td><td align="char" char="." valign="top">&#x003C;3 per year</td><td align="char" char="." valign="top">&#x003C;3 per year</td><td align="char" char="." valign="top">&#x003C;3 per year</td></tr></tbody></table><table-wrap-foot><fn id="table5fn1"><p><sup>a</sup>KPS: Karnofsky performance status.</p></fn><fn id="table5fn2"><p><sup>b</sup>HCT: hematopoietic cell transplantation.</p></fn><fn id="table5fn3"><p><sup>c</sup>CMV: cytomegalovirus.</p></fn><fn id="table5fn4"><p><sup>d</sup>HLA: human leukocyte antigen.</p></fn><fn id="table5fn5"><p><sup>e</sup>Flu/Bu: fludarabine  + busulfan.</p></fn><fn id="table5fn6"><p><sup>f</sup>TBI/Cy/Flu/TT: total body irradiation + cyclophosphamide + fludarabine + thiotepa.</p></fn><fn id="table5fn7"><p><sup>g</sup>ATG: anti-thymocyte globulin.</p></fn><fn id="table5fn8"><p><sup>h</sup>GVHD: graft-versus-host disease.</p></fn><fn id="table5fn9"><p><sup>i</sup>CNI+MTX: calcineurin inhibitor + methotrexate.</p></fn><fn id="table5fn10"><p><sup>j</sup>Post-Cy+Siro&#x00B1; MMF: post-transplant cyclophosphamide + sirolimus + mycophenolate mofetil.</p></fn><fn id="table5fn11"><p><sup>k</sup>SCD: sickle cell disease.</p></fn><fn id="table5fn12"><p><sup>l</sup>ACS: acute chest syndrome.</p></fn><fn id="table5fn13"><p><sup>m</sup>VOC: vaso-occlusive crisis.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-3"><title>SPRIGHT User Interface</title><p>Age is the only numeric feature that is entered manually. The rest of the inputs are categorical and are entered by selecting an option from a drop-down menu. Patient-specific, disease-specific data, and treatment donor, one of the major published treatment regimens or a customized conditioning regimen, conditioning intensity, and GVHD prophylaxis ATG/Alemtuzumab data can be entered. The tabular output describes predicted OS, EFS, GF, Death, AGVHD, CGVHD, VOC, and stroke. The predicted outcomes are also pictorially represented in pie charts. One unique feature is that the user has the option of selecting a published HCT regimen or selecting a custom regimen by combining conditioning, GVHD prophylaxis, and serotherapy (<xref ref-type="fig" rid="figure3">Figure 3</xref>; Table S4 in <xref ref-type="supplementary-material" rid="app9">Multimedia Appendix 9</xref>). This feature decreases keystrokes, improves ease of use of the app, and facilitates a comparative analysis across donor types and treatment regimens for different donor types. SPRIGHT can be accessed on any smartphone, tablet, or personal computer using a shortened URL or a QR code. The individualized outcomes are also represented as pie charts displaying individualized estimates (<xref ref-type="fig" rid="figure3">Figure 3</xref>). The pie charts can be downloaded and shared with the patient or added to the electronic medical record as an image.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Clinician user interface of sickle cell predicting outcomes of hematopoietic cell transplantation</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e64519_fig03.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>We describe the development and internal validation of SPRIGHT, which, to the best of our knowledge, is the first ML individualized prediction tool for HCT for SCD. Eapen et al [<xref ref-type="bibr" rid="ref5">5</xref>] identified age, donor type, and conditioning regimen intensity as critical predictive factors of outcomes of HCT for SCD. Gluckman et al [<xref ref-type="bibr" rid="ref6">6</xref>] identified age and year of transplant as critical factors. Younger patients were shown to have higher EFS. Cappelli et al [<xref ref-type="bibr" rid="ref43">43</xref>] reported better OS and EFS and a lower incidence of AGVHD and CGVHD in younger patients. Together, these registry-based studies generated important population-level predictive factors of HCT for SCD. They do not, however, provide a means to combine patient, transplant, and disease characteristics into a personalized predictive model for outcomes of HCT. The SPRIGHT prediction model incorporates multiple relevant pre-HCT predictive factors for the individualized production of key clinically relevant post-HCT outcomes. The RF algorithm outperforms the Brazauskas model, other ML algorithms, and logistic regression in predictive performance. The RF-based SPRIGHT prediction model has high predictive discrimination and calibration performance. The excellent discriminative predictive performance is demonstrated by the high value of AUC across all outcomes across all eras, age groups, and follow-up periods of 1 or 3 years. Going beyond the commonly reported predictive discrimination with AUC, we also reported accuracy and balanced accuracy and calibration measures of calibration curve, slope, and intercept. An important innovation of SPRIGHT is the option for the end user to select a published regimen that combines chemotherapy or radiation conditioning, serotherapy, and GVHD prophylaxis. This innovation simplifies the decision-making process for clinicians and allows them to compare potential outcomes across different regimens and donor types. Thus, SPRIGHT helps physicians and patients in discerning the nuances in efficacy and safety of HCT for the individual and has the potential to inform and guide shared decision-making. To mitigate overfitting and validate model performance, we used 10-fold cross-validation, ensuring robustness by mimicking multiple tests on independent datasets. This method approximates the effectiveness of external validation by exposing the model to various training and validation splits, thus predicting its behavior on unseen data. We addressed potential feature collinearity using recursive feature elimination for feature selection and RFs, which inherently mitigate correlation effects through random feature subsampling at each tree.</p></sec><sec id="s4-2"><title>Limitations</title><p>There are several limitations to this study. The SPRIGHT predictive tool is based on the HCT for the SCD dataset derived from data reported to CIBMTR. The use of the CIBMTR dataset allows us to access the entire US experience reported, but it is also subject to limitations of registry studies, including bias, loss to follow-up, and a lack of generalizability across era, center volume, and expertise. While SPRIGHT uses multiple predictive variables, it is limited to those variables collected by CIBMTR. In 2019, Bolanos-Meade et al [<xref ref-type="bibr" rid="ref44">44</xref>] reported an improved EFS following mismatched relative donor HCT as compared to their previous report in 2009 following the escalation of the dose of total body irradiation from 200 to 400 cGy [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref45">45</xref>]. However, CIBMTR form 2400 and consequently the HCT for the SCD dataset do not distinguish between patients who received the 2 dose levels of total body irradiation. Further, since pain crisis post-HCT is included as a discrete variable in the HCT for the SCD dataset, it is not possible to discern its timing, frequency, or severity. In the CIBMTR dataset, an analysis of predictor completeness reveals that 10/13 TED variables chosen have a completion rate exceeding 98%. However, only approximately 46.5% (763/1641) of patients are on the CRF track, with comprehensive disease-specific data available, a category under which 4 of our predictor variables fall. While the missing data could be a source of bias, the missingness of data was only a function of whether the institution was designated as a TED-only or CRF and whether the CIBMTR algorithm assigned an individual patient to the CRF track. Thus, the missing data may be missing at random. The model we used for imputation, MissForest, has been shown to outperform all other algorithms in all metrics [<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref47">47</xref>]. However, these imputation algorithms can produce severely biased regression coefficients and require a careful critique of the missing data mechanism and the interrelationships between the variables in the data [<xref ref-type="bibr" rid="ref48">48</xref>]. Overall, we acknowledge the concerns regarding the lack of details of SCD-related complications and the completeness of reporting of SCD-related clinical outcomes in the CIBMTR dataset. We, however, also recognize that the CIBMTR registry, with federally mandated data submission, contains the most complete data available. We support ongoing efforts to refine the data collection measures and training of data collection staff. One of the limitations of our model is the rarity of death events in the dataset, which limits the precision of calibration for OS. As a result, the model tends to slightly overestimate OS risk, particularly in subgroups with fewer events. This calibration limitation should be considered when interpreting OS predictions. Across all outcomes, despite mitigatory efforts to combat the inherent bias due to class imbalance, the bias may not be fully eradicated.</p><p>The tool incorporates a set of treatment regimens that have been carefully selected from peer-reviewed studies, ensuring that they are backed by sufficient clinical data. These regimens provide reliable and evidence-based predictions. However, custom combinations that are entered by users may fall outside of the dataset&#x2019;s training and may not be fully supported by the underlying data. In such cases, it is essential to rely on clinical expertise and user discretion when interpreting the predictions.</p><p>We recognize a limitation in the registry data due to inherent selection bias, with limited insight into the clinical reasoning behind regimen choices. While the data detail the regimens administered, the underlying clinical judgment is often not recorded. This limitation is particularly notable in HCT, where the absence of universally established standards of care presents additional challenges. Thus, the model and its results should be interpreted within this context.</p></sec><sec id="s4-3"><title>Comparison With Prior Work</title><p>Brazauskas et al [<xref ref-type="bibr" rid="ref16">16</xref>] developed a risk score using age and donor type as discrete variables. They reported that patients aged &#x2264;12 years with an HLA-matched sibling donor were at the lowest risk. Patients aged &#x2265;13 years with an HLA-matched sibling donor or aged &#x2264;12 years with an HLA-matched unrelated donor were at intermediate risk. All other groups were at high risk. This simple risk score has good predictive performance but has certain gaps that limit its utility in the individualized prediction of outcomes. Gluckman et al [<xref ref-type="bibr" rid="ref6">6</xref>] used age as a continuous variable and observed that for every 1-year increment in age, there was a 9% increase in the HR for treatment failure (graft failure or death) and a 10% increase in the HR for death. Thus, the Brazauskas model does not include the potential predictive value of increasing age from 5 to 13 years. Further, Brazauskas et al [<xref ref-type="bibr" rid="ref16">16</xref>] do not incorporate other patient, disease, and conditioning regimen characteristics in the prediction model because they considered these factors to be dynamic and subject to change. The Brazauskas model limits the predicted post-HCT outcomes to death, OS, and EFS and does not include other outcomes that are important to physicians and patients in shared decision-making, such as AGVHD, CGVHD, recurrent pain crisis, or stroke after HCT [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref49">49</xref>-<xref ref-type="bibr" rid="ref51">51</xref>]. SPRIGHT predicts these outcomes with high predictive performance. Other published ML predictive models for HCT for other diseases are of limited clinical relevance in decision-making since they limit themselves to predicting single outcomes, such as death, overall survival, disease relapse, GVHD, busulfan exposure, kidney injury, or reactivation of Epstein-Barr virus [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref30">30</xref>].</p><p>In developing SPRIGHT, we addressed the gaps in the knowledge in individualized prediction of outcomes of HCT. We included 7 clinically relevant outcomes, including rates of OS, EFS, GF, death, AGVHD, CGVHD, and VOC. Each of these 7 clinical outcomes initially required a distinct set of 10&#x2010;11 pre-HCT predictive features for optimal performance, leading to incomplete overlap and potential model fragmentation. For addressing this, we adopted a unified approach by selecting a comprehensive set of 17 pre-HCT patient, HCT, and disease characteristics. By applying robust feature selection techniques to optimize predictive performance and improve the model&#x2019;s overall clinical applicability, we demonstrated the predictive value of these features. Further, patient and disease features inform patient selection and HCT features inform regimen selection. Thus, these additional pre-HCT features are important, clinically relevant considerations in decision-making.</p></sec><sec id="s4-4"><title>Future Directions</title><p>We present here an initial in-lab validation of the predictive model. We recognize that the use of a US-based dataset may limit the generalizability of our findings to other regions or health care systems due to potential differences in population characteristics and treatment protocols. For further external validation across different geographic locations to establish the model&#x2019;s generalizability and clinical utility, we propose to use the European Bone Marrow Transplantation Registry (EBMT) dataset, a completely independent dataset, in collaboration with European investigators. Of note, Gluckman et al [<xref ref-type="bibr" rid="ref6">6</xref>] have previously combined CIBMTR and EBMT registry data on SCD for analysis and included similar numbers of children, adults, and donor types from the 2 registries and do not report differences in predictive factors or outcomes in the 2 registries. We have demonstrated that the predictive performance remains equivalent whether we use the entire dataset or the more recent data after 2007, which represents 84% (1378/1641) of participants in the dataset. Acknowledging the evolving nature of supportive care practices in HCT, we propose further temporal validation with future years of data being added to this dataset. We also propose to continue to enhance SPRIGHT by incorporating expert opinion, adapting to patient health literacy, values, and preferences, and using patient-friendly data visualization to support shared decision-making [<xref ref-type="bibr" rid="ref52">52</xref>-<xref ref-type="bibr" rid="ref54">54</xref>].</p></sec><sec id="s4-5"><title>Conclusions</title><p>In conclusion, the SPRIGHT prediction model integrates individual-specific patient and disease characteristics, conditioning regimens, GVHD prophylaxis, and donor characteristics and predicts key clinical outcomes. It exhibits superior predictive performance across multiple measures of discrimination and calibration as compared to logistic regression and other ensemble ML methods.</p></sec></sec></body><back><notes><sec><title>Data Availability</title><p>The National Institutes of Health (NIH) Hematopoietic Cell Transplantation for Sickle Cell Disease dataset used to develop the clinical decision tool was obtained from the NHLBI Biologic Specimen and Data Repository Information Coordinating Center (BIOLINCC) [<xref ref-type="bibr" rid="ref31">31</xref>]. For additional questions on the algorithms used for the decision tool, investigators may contact the corresponding author by email.</p></sec></notes><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AGVHD</term><def><p>acute graft-versus-host disease</p></def></def-item><def-item><term id="abb2">AUC</term><def><p>area under the curve</p></def></def-item><def-item><term id="abb3">BFE</term><def><p>backward feature elimination</p></def></def-item><def-item><term id="abb4">BIOLINCC</term><def><p>Biologic Specimen and Data Repository Information Coordinating Center</p></def></def-item><def-item><term id="abb5">CGVHD</term><def><p>chronic graft-versus-host disease</p></def></def-item><def-item><term id="abb6">CIBMTR</term><def><p>Center for International Bone Marrow Transplant Research</p></def></def-item><def-item><term id="abb7">CREMLS</term><def><p>Consolidated Reporting of Machine Learning Studies</p></def></def-item><def-item><term id="abb8">CRF</term><def><p>Comprehensive Report Form</p></def></def-item><def-item><term id="abb9">EBMT</term><def><p>European Bone Marrow Transplantation Registry</p></def></def-item><def-item><term id="abb10">EFS</term><def><p>event-free survival</p></def></def-item><def-item><term id="abb11">FSS</term><def><p>forward sequential selection</p></def></def-item><def-item><term id="abb12">GF</term><def><p>graft-free survival</p></def></def-item><def-item><term id="abb13">GVHD</term><def><p>graft-versus-host disease</p></def></def-item><def-item><term id="abb14">HCT</term><def><p>hematopoietic cell transplantation</p></def></def-item><def-item><term id="abb15">HR</term><def><p>hazard ratio</p></def></def-item><def-item><term id="abb16">ML</term><def><p>machine learning</p></def></def-item><def-item><term id="abb17">NHLBI </term><def><p>National Heart, Lung, and Blood Institute</p></def></def-item><def-item><term id="abb18">OS</term><def><p>overall survival</p></def></def-item><def-item><term id="abb19">SCD</term><def><p>sickle cell disease</p></def></def-item><def-item><term id="abb20">SHAP </term><def><p>Shapley additive explanations</p></def></def-item><def-item><term id="abb21">SPRIGHT</term><def><p>Sickle Cell Predicting Outcomes of Hematopoietic Cell Transplantation</p></def></def-item><def-item><term id="abb22">VOC</term><def><p>vasocclusive crisis</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kato</surname><given-names>GJ</given-names> </name><name name-style="western"><surname>Piel</surname><given-names>FB</given-names> </name><name name-style="western"><surname>Reid</surname><given-names>CD</given-names> </name><etal/></person-group><article-title>Sickle cell disease</article-title><source>Nat Rev Dis Primers</source><year>2018</year><month>03</month><day>15</day><volume>4</volume><issue>1</issue><fpage>18010</fpage><pub-id pub-id-type="doi">10.1038/nrdp.2018.10</pub-id><pub-id pub-id-type="medline">29542687</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Walters</surname><given-names>MC</given-names> </name><name name-style="western"><surname>De Castro</surname><given-names>LM</given-names> </name><name name-style="western"><surname>Sullivan</surname><given-names>KM</given-names> </name><etal/></person-group><article-title>Indications and results of HLA-identical sibling hematopoietic cell transplantation for sickle cell disease</article-title><source>Biol Blood Marrow Transplant</source><year>2016</year><month>02</month><volume>22</volume><issue>2</issue><fpage>207</fpage><lpage>211</lpage><pub-id pub-id-type="doi">10.1016/j.bbmt.2015.10.017</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Walters</surname><given-names>MC</given-names> </name><name name-style="western"><surname>Patience</surname><given-names>M</given-names> </name><name name-style="western"><surname>Leisenring</surname><given-names>W</given-names> </name><etal/></person-group><article-title>Bone marrow transplantation for sickle cell disease</article-title><source>N Engl J Med</source><year>1996</year><month>08</month><day>8</day><volume>335</volume><issue>6</issue><fpage>369</fpage><lpage>376</lpage><pub-id pub-id-type="doi">10.1056/NEJM199608083350601</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Walters</surname><given-names>MC</given-names> </name><name name-style="western"><surname>Patience</surname><given-names>M</given-names> </name><name name-style="western"><surname>Leisenring</surname><given-names>W</given-names> </name><etal/></person-group><article-title>Collaborative multicenter investigation of marrow transplantation for sickle cell disease: current results and future directions</article-title><source>Biol Blood Marrow Transplant</source><year>1997</year><month>12</month><volume>3</volume><issue>6</issue><fpage>310</fpage><lpage>315</lpage><pub-id pub-id-type="medline">9502298</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Eapen</surname><given-names>M</given-names> </name><name name-style="western"><surname>Brazauskas</surname><given-names>R</given-names> </name><name name-style="western"><surname>Walters</surname><given-names>MC</given-names> </name><etal/></person-group><article-title>Effect of donor type and conditioning regimen intensity on allogeneic transplantation outcomes in patients with sickle cell disease: a retrospective multicentre, cohort study</article-title><source>Lancet Haematol</source><year>2019</year><month>11</month><volume>6</volume><issue>11</issue><fpage>e585</fpage><lpage>e596</lpage><pub-id pub-id-type="doi">10.1016/S2352-3026(19)30154-1</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gluckman</surname><given-names>E</given-names> </name><name name-style="western"><surname>Cappelli</surname><given-names>B</given-names> </name><name name-style="western"><surname>Bernaudin</surname><given-names>F</given-names> </name><etal/></person-group><article-title>Sickle cell disease: an international survey of results of HLA-identical sibling hematopoietic stem cell transplantation</article-title><source>Blood</source><year>2017</year><month>03</month><day>16</day><volume>129</volume><issue>11</issue><fpage>1548</fpage><lpage>1556</lpage><pub-id pub-id-type="doi">10.1182/blood-2016-10-745711</pub-id><pub-id pub-id-type="medline">27965196</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hsieh</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Fitzhugh</surname><given-names>CD</given-names> </name><name name-style="western"><surname>Weitzel</surname><given-names>RP</given-names> </name><etal/></person-group><article-title>Nonmyeloablative HLA-matched sibling allogeneic hematopoietic stem cell transplantation for severe sickle cell phenotype</article-title><source>JAMA</source><year>2014</year><month>07</month><day>2</day><volume>312</volume><issue>1</issue><fpage>48</fpage><lpage>56</lpage><pub-id pub-id-type="doi">10.1001/jama.2014.7192</pub-id><pub-id pub-id-type="medline">25058217</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Krishnamurti</surname><given-names>L</given-names> </name><name name-style="western"><surname>Neuberg</surname><given-names>DS</given-names> </name><name name-style="western"><surname>Sullivan</surname><given-names>KM</given-names> </name><etal/></person-group><article-title>Bone marrow transplantation for adolescents and young adults with sickle cell disease: results of a prospective multicenter pilot study</article-title><source>Am J Hematol</source><year>2019</year><month>04</month><volume>94</volume><issue>4</issue><fpage>446</fpage><lpage>454</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://onlinelibrary.wiley.com/toc/10968652/94/4">https://onlinelibrary.wiley.com/toc/10968652/94/4</ext-link></comment><pub-id pub-id-type="doi">10.1002/ajh.25401</pub-id><pub-id pub-id-type="medline">30637784</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shenoy</surname><given-names>S</given-names> </name><name name-style="western"><surname>Eapen</surname><given-names>M</given-names> </name><name name-style="western"><surname>Panepinto</surname><given-names>JA</given-names> </name><etal/></person-group><article-title>A trial of unrelated donor marrow transplantation for children with severe sickle cell disease</article-title><source>Blood</source><year>2016</year><month>11</month><day>24</day><volume>128</volume><issue>21</issue><fpage>2561</fpage><lpage>2567</lpage><pub-id pub-id-type="doi">10.1182/blood-2016-05-715870</pub-id><pub-id pub-id-type="medline">27625358</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>King</surname><given-names>AA</given-names> </name><name name-style="western"><surname>Kamani</surname><given-names>N</given-names> </name><name name-style="western"><surname>Bunin</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Successful matched sibling donor marrow transplantation following reduced intensity conditioning in children with hemoglobinopathies</article-title><source>Am J Hematol</source><year>2015</year><month>12</month><volume>90</volume><issue>12</issue><fpage>1093</fpage><lpage>1098</lpage><pub-id pub-id-type="doi">10.1002/ajh.24183</pub-id><pub-id pub-id-type="medline">26348869</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gluckman</surname><given-names>E</given-names> </name><name name-style="western"><surname>Cappelli</surname><given-names>B</given-names> </name><name name-style="western"><surname>Scigliuolo</surname><given-names>GM</given-names> </name><name name-style="western"><surname>Fuente</surname><given-names>JD la</given-names> </name><name name-style="western"><surname>Corbacioglu</surname><given-names>S</given-names> </name></person-group><article-title>Alternative donor hematopoietic stem cell transplantation for sickle cell disease in Europe</article-title><source>Hematol Oncol Stem Cell Ther</source><year>2020</year><volume>13</volume><issue>4</issue><fpage>181</fpage><lpage>188</lpage><pub-id pub-id-type="doi">10.1016/j.hemonc.2019.12.011</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Patel</surname><given-names>DA</given-names> </name><name name-style="western"><surname>Akinsete</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Fuente</surname><given-names>J de la</given-names> </name><name name-style="western"><surname>Kassim</surname><given-names>AA</given-names> </name></person-group><article-title>Haploidentical bone marrow transplant with posttransplant cyclophosphamide for sickle cell disease: an update</article-title><source>Hematol Oncol Stem Cell Ther</source><year>2020</year><volume>13</volume><issue>2</issue><fpage>91</fpage><lpage>97</lpage><pub-id pub-id-type="doi">10.1016/j.hemonc.2020.01.002</pub-id><pub-id pub-id-type="medline">32202252</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sinha</surname><given-names>CB</given-names> </name><name name-style="western"><surname>Bakshi</surname><given-names>N</given-names> </name><name name-style="western"><surname>Ross</surname><given-names>D</given-names> </name><name name-style="western"><surname>Loewenstein</surname><given-names>G</given-names> </name><name name-style="western"><surname>Krishnamurti</surname><given-names>L</given-names> </name></person-group><article-title>Primary caregiver decision-making in hematopoietic cell transplantation and gene therapy for sickle cell disease</article-title><source>Pediatr Blood Cancer</source><year>2021</year><month>01</month><volume>68</volume><issue>1</issue><fpage>e28749</fpage><pub-id pub-id-type="doi">10.1002/pbc.28749</pub-id><pub-id pub-id-type="medline">33034129</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bakshi</surname><given-names>N</given-names> </name><name name-style="western"><surname>Katoch</surname><given-names>D</given-names> </name><name name-style="western"><surname>Sinha</surname><given-names>CB</given-names> </name><etal/></person-group><article-title>Assessment of patient and caregiver attitudes and approaches to decision-making regarding bone marrow transplant for sickle cell disease: a qualitative study</article-title><source>JAMA Netw Open</source><year>2020</year><month>05</month><day>1</day><volume>3</volume><issue>5</issue><fpage>e206742</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2020.6742</pub-id><pub-id pub-id-type="medline">32469414</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Veludhandi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ross</surname><given-names>D</given-names> </name><name name-style="western"><surname>Sinha</surname><given-names>CB</given-names> </name><name name-style="western"><surname>McCracken</surname><given-names>C</given-names> </name><name name-style="western"><surname>Bakshi</surname><given-names>N</given-names> </name><name name-style="western"><surname>Krishnamurti</surname><given-names>L</given-names> </name></person-group><article-title>A decision support tool for allogeneic hematopoietic stem cell transplantation for children with sickle cell disease: acceptability and usability study</article-title><source>JMIR Form Res</source><year>2021</year><month>10</month><day>28</day><volume>5</volume><issue>10</issue><fpage>e30093</fpage><pub-id pub-id-type="doi">10.2196/30093</pub-id><pub-id pub-id-type="medline">34709190</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Brazauskas</surname><given-names>R</given-names> </name><name name-style="western"><surname>Scigliuolo</surname><given-names>GM</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>HL</given-names> </name><etal/></person-group><article-title>Risk score to predict event-free survival after hematopoietic cell transplant for sickle cell disease</article-title><source>Blood</source><year>2020</year><month>07</month><day>30</day><volume>136</volume><issue>5</issue><fpage>623</fpage><lpage>626</lpage><pub-id pub-id-type="doi">10.1182/blood.2020005687</pub-id><pub-id pub-id-type="medline">32518950</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Afanaseva</surname><given-names>KS</given-names> </name><name name-style="western"><surname>Bakin</surname><given-names>EA</given-names> </name><name name-style="western"><surname>Smirnova</surname><given-names>AG</given-names> </name><etal/></person-group><article-title>A pilot study of implication of machine learning for relapse prediction after allogeneic stem cell transplantation in adults with Ph-positive acute lymphoblastic leukemia</article-title><source>Sci Rep</source><year>2023</year><month>10</month><day>5</day><volume>13</volume><issue>1</issue><fpage>16790</fpage><pub-id pub-id-type="doi">10.1038/s41598-023-43950-w</pub-id><pub-id pub-id-type="medline">37798335</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Al-Riyami</surname><given-names>AZ</given-names> </name><name name-style="western"><surname>Maryamchik</surname><given-names>E</given-names> </name><name name-style="western"><surname>Hanna</surname><given-names>RS</given-names> </name><etal/></person-group><article-title>A machine-learning model that incorporates CD45 surface expression predicts hematopoietic progenitor cell recovery after freeze-thaw</article-title><source>Cytotherapy</source><year>2023</year><month>10</month><volume>25</volume><issue>10</issue><fpage>1048</fpage><lpage>1056</lpage><pub-id pub-id-type="doi">10.1016/j.jcyt.2023.05.007</pub-id><pub-id pub-id-type="medline">37318396</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Hong</surname><given-names>HY</given-names> </name><name name-style="western"><surname>Dong</surname><given-names>XY</given-names> </name><etal/></person-group><article-title>Machine learning algorithm as a prognostic tool for Epstein-Barr virus reactivation after haploidentical hematopoietic stem cell transplantation</article-title><source>Blood Sci</source><year>2023</year><month>01</month><volume>5</volume><issue>1</issue><fpage>51</fpage><lpage>59</lpage><pub-id pub-id-type="doi">10.1097/BS9.0000000000000143</pub-id><pub-id pub-id-type="medline">36742189</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Keret</surname><given-names>S</given-names> </name><name name-style="western"><surname>Rimar</surname><given-names>D</given-names> </name><name name-style="western"><surname>Lansiaux</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Differentially expressed genes in systemic sclerosis: towards predictive medicine with new molecular tools for clinicians</article-title><source>Autoimmun Rev</source><year>2023</year><month>06</month><volume>22</volume><issue>6</issue><fpage>103314</fpage><pub-id pub-id-type="doi">10.1016/j.autrev.2023.103314</pub-id><pub-id pub-id-type="medline">36918090</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>D</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>J</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Predicting busulfan exposure in patients undergoing hematopoietic stem cell transplantation using machine learning techniques</article-title><source>Expert Rev Clin Pharmacol</source><year>2023</year><volume>16</volume><issue>8</issue><fpage>751</fpage><lpage>761</lpage><pub-id pub-id-type="doi">10.1080/17512433.2023.2226866</pub-id><pub-id pub-id-type="medline">37326641</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mushtaq</surname><given-names>AH</given-names> </name><name name-style="western"><surname>Shafqat</surname><given-names>A</given-names> </name><name name-style="western"><surname>Salah</surname><given-names>HT</given-names> </name><name name-style="western"><surname>Hashmi</surname><given-names>SK</given-names> </name><name name-style="western"><surname>Muhsen</surname><given-names>IN</given-names> </name></person-group><article-title>Machine learning applications and challenges in graft-versus-host disease: a scoping review</article-title><source>Curr Opin Oncol</source><year>2023</year><month>11</month><day>1</day><volume>35</volume><issue>6</issue><fpage>594</fpage><lpage>600</lpage><pub-id pub-id-type="doi">10.1097/CCO.0000000000000996</pub-id><pub-id pub-id-type="medline">37820094</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shourabizadeh</surname><given-names>H</given-names> </name><name name-style="western"><surname>Aleman</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Rousseau</surname><given-names>LM</given-names> </name><name name-style="western"><surname>Law</surname><given-names>AD</given-names> </name><name name-style="western"><surname>Viswabandya</surname><given-names>A</given-names> </name><name name-style="western"><surname>Michelis</surname><given-names>FV</given-names> </name></person-group><article-title>Machine learning for the prediction of survival post-allogeneic hematopoietic cell transplantation: a single-center experience</article-title><source>Acta Haematol</source><year>2024</year><month>05</month><day>29</day><volume>147</volume><issue>3</issue><fpage>280</fpage><lpage>291</lpage><pub-id pub-id-type="doi">10.1159/000533665</pub-id><pub-id pub-id-type="medline">37769635</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sobrino</surname><given-names>S</given-names> </name><name name-style="western"><surname>Magnani</surname><given-names>A</given-names> </name><name name-style="western"><surname>Semeraro</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Severe hematopoietic stem cell inflammation compromises chronic granulomatous disease gene therapy</article-title><source>Cell Rep Med</source><year>2023</year><month>02</month><day>21</day><volume>4</volume><issue>2</issue><fpage>100919</fpage><pub-id pub-id-type="doi">10.1016/j.xcrm.2023.100919</pub-id><pub-id pub-id-type="medline">36706754</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sorror</surname><given-names>ML</given-names> </name></person-group><article-title>The use of prognostic models in allogeneic transplants: a perspective guide for clinicians and investigators</article-title><source>Blood</source><year>2023</year><month>05</month><day>4</day><volume>141</volume><issue>18</issue><fpage>2173</fpage><lpage>2186</lpage><pub-id pub-id-type="doi">10.1182/blood.2022017999</pub-id><pub-id pub-id-type="medline">36800564</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sparapani</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Logan</surname><given-names>BR</given-names> </name><name name-style="western"><surname>Maiers</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Laud</surname><given-names>PW</given-names> </name><name name-style="western"><surname>McCulloch</surname><given-names>RE</given-names> </name></person-group><article-title>Nonparametric failure time: Time-to-event machine learning with heteroskedastic Bayesian additive regression trees and low information omnibus Dirichlet process mixtures</article-title><source>Biometrics</source><year>2023</year><month>12</month><volume>79</volume><issue>4</issue><fpage>3023</fpage><lpage>3037</lpage><pub-id pub-id-type="doi">10.1111/biom.13857</pub-id><pub-id pub-id-type="medline">36932826</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>von Asmuth</surname><given-names>EGJ</given-names> </name><name name-style="western"><surname>Neven</surname><given-names>B</given-names> </name><name name-style="western"><surname>Albert</surname><given-names>MH</given-names> </name><etal/></person-group><article-title>Predicting Patient Death after Allogeneic Stem Cell Transplantation for Inborn Errors Using Machine Learning (PREPAD): A European Society for Blood and Marrow Transplantation Inborn Errors Working Party Study</article-title><source>Transplant Cell Ther</source><year>2023</year><month>12</month><volume>29</volume><issue>12</issue><fpage>775</fpage><pub-id pub-id-type="doi">10.1016/j.jtct.2023.09.007</pub-id><pub-id pub-id-type="medline">37709203</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>P</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>C</given-names> </name><name name-style="western"><surname>Wei</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Nomogram for predicting early mortality after umbilical cord blood transplantation in children with inborn errors of immunity</article-title><source>J Clin Immunol</source><year>2023</year><month>08</month><volume>43</volume><issue>6</issue><fpage>1379</fpage><lpage>1392</lpage><pub-id pub-id-type="doi">10.1007/s10875-023-01505-8</pub-id><pub-id pub-id-type="medline">37155023</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhou</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>J</given-names> </name><name name-style="western"><surname>Keerthi</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Longitudinal clinical data improve survival prediction after hematopoietic cell transplantation using machine learning</article-title><source>Blood Adv</source><year>2024</year><month>02</month><day>13</day><volume>8</volume><issue>3</issue><fpage>686</fpage><lpage>698</lpage><pub-id pub-id-type="doi">10.1182/bloodadvances.2023011752</pub-id><pub-id pub-id-type="medline">37991991</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Krishnamurti</surname><given-names>L</given-names> </name><name name-style="western"><surname>Liang</surname><given-names>J</given-names> </name><name name-style="western"><surname>He</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Incidence and risk factors of pain crisis after hematopoietic cell transplantation for sickle cell disease</article-title><source>Blood Adv</source><year>2024</year><month>04</month><day>23</day><volume>8</volume><issue>8</issue><fpage>1908</fpage><lpage>1919</lpage><pub-id pub-id-type="doi">10.1182/bloodadvances.2023010749</pub-id><pub-id pub-id-type="medline">38324722</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Friedman</surname><given-names>D</given-names> </name><name name-style="western"><surname>Dozor</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Milner</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Stable to improved cardiac and pulmonary function in children with high-risk sickle cell disease following haploidentical stem cell transplantation</article-title><source>Bone Marrow Transplant</source><year>2021</year><month>09</month><volume>56</volume><issue>9</issue><fpage>2221</fpage><lpage>2230</lpage><pub-id pub-id-type="doi">10.1038/s41409-021-01298-7</pub-id><pub-id pub-id-type="medline">33958740</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Giffen</surname><given-names>CA</given-names> </name><name name-style="western"><surname>Carroll</surname><given-names>LE</given-names> </name><name name-style="western"><surname>Adams</surname><given-names>JT</given-names> </name><name name-style="western"><surname>Brennan</surname><given-names>SP</given-names> </name><name name-style="western"><surname>Coady</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Wagner</surname><given-names>EL</given-names> </name></person-group><article-title>Providing contemporary access to historical biospecimen collections: Development of the NHLBI Biologic Specimen and Data Repository Information Coordinating Center (BioLINCC)</article-title><source>Biopreserv Biobank</source><year>2015</year><month>08</month><volume>13</volume><issue>4</issue><fpage>271</fpage><lpage>279</lpage><pub-id pub-id-type="doi">10.1089/bio.2014.0050</pub-id><pub-id pub-id-type="medline">26186276</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hong</surname><given-names>F</given-names> </name><name name-style="western"><surname>Tian</surname><given-names>L</given-names> </name><name name-style="western"><surname>Devanarayan</surname><given-names>V</given-names> </name></person-group><article-title>Improving the robustness of variable selection and predictive performance of regularized generalized linear models and Cox proportional hazard models</article-title><source>Mathematics</source><year>2023</year><month>02</month><volume>11</volume><issue>3</issue><fpage>557</fpage><pub-id pub-id-type="doi">10.3390/math11030557</pub-id><pub-id pub-id-type="medline">37990696</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jain</surname><given-names>R</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>W</given-names> </name></person-group><article-title>Artificial Intelligence based wrapper for high dimensional feature selection</article-title><source>BMC Bioinformatics</source><year>2023</year><month>10</month><day>18</day><volume>24</volume><issue>1</issue><fpage>392</fpage><pub-id pub-id-type="doi">10.1186/s12859-023-05502-x</pub-id><pub-id pub-id-type="medline">37853338</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ruisen</surname><given-names>L</given-names> </name><name name-style="western"><surname>Songyi</surname><given-names>D</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>W</given-names> </name><etal/></person-group><article-title>Bagging of XGBoost classifiers with random under-sampling and Tomek link for noisy label-imbalanced data</article-title><source>IOP Conf Ser: Mater Sci Eng</source><volume>428</volume><issue>1</issue><fpage>012004</fpage><pub-id pub-id-type="doi">10.1088/1757-899X/428/1/012004</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Van Calster</surname><given-names>B</given-names> </name><name name-style="western"><surname>McLernon</surname><given-names>DJ</given-names> </name><name name-style="western"><surname>van Smeden</surname><given-names>M</given-names> </name><name name-style="western"><surname>Wynants</surname><given-names>L</given-names> </name><name name-style="western"><surname>Steyerberg</surname><given-names>EW</given-names> </name><collab>Topic Group &#x2018;Evaluating diagnostic tests and prediction models&#x2019; of the STRATOS initiative</collab></person-group><article-title>Calibration: the Achilles heel of predictive analytics</article-title><source>BMC Med</source><year>2019</year><month>12</month><day>16</day><volume>17</volume><issue>1</issue><fpage>230</fpage><pub-id pub-id-type="doi">10.1186/s12916-019-1466-7</pub-id><pub-id pub-id-type="medline">31842878</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van der Laan</surname><given-names>L</given-names> </name><name name-style="western"><surname>Ulloa-P&#x00E9;rez</surname><given-names>E</given-names> </name><name name-style="western"><surname>Carone</surname><given-names>M</given-names> </name><name name-style="western"><surname>Luedtke</surname><given-names>A</given-names> </name></person-group><article-title>Causal isotonic calibration for heterogeneous treatment effects</article-title><source>Proc Mach Learn Res</source><year>2023</year><month>07</month><volume>202</volume><fpage>34831</fpage><lpage>34854</lpage><pub-id pub-id-type="medline">37575467</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Pozzolo</surname><given-names>AD</given-names> </name><name name-style="western"><surname>Caelen</surname><given-names>O</given-names> </name><name name-style="western"><surname>Johnson</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Bontempi</surname><given-names>G</given-names> </name></person-group><article-title>Calibrating probability with undersampling for unbalanced classification</article-title><conf-name>2015 IEEE Symposium Series on Computational Intelligence (SSCI)</conf-name><conf-date>Dec 7-10, 2015</conf-date><conf-loc>Cape Town, South Africa</conf-loc><pub-id pub-id-type="doi">10.1109/SSCI.2015.33</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>El Emam</surname><given-names>K</given-names> </name><name name-style="western"><surname>Leung</surname><given-names>TI</given-names> </name><name name-style="western"><surname>Malin</surname><given-names>B</given-names> </name><name name-style="western"><surname>Klement</surname><given-names>W</given-names> </name><name name-style="western"><surname>Eysenbach</surname><given-names>G</given-names> </name></person-group><article-title>Consolidated Reporting Guidelines for Prognostic and Diagnostic Machine Learning Models (CREMLS)</article-title><source>J Med Internet Res</source><year>2024</year><month>05</month><day>2</day><volume>26</volume><fpage>e52508</fpage><pub-id pub-id-type="doi">10.2196/52508</pub-id><pub-id pub-id-type="medline">38696776</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Bouckaert</surname><given-names>RR</given-names> </name><name name-style="western"><surname>Frank</surname><given-names>E</given-names> </name></person-group><source>Evaluating the Replicability of Significance Tests for Comparing Learning Algorithms</source><year>2004</year><publisher-name>Berlin, Heidelberg: Springer</publisher-name></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nadeau</surname><given-names>C</given-names> </name><name name-style="western"><surname>Bengio</surname><given-names>Y</given-names> </name></person-group><article-title>Inference for the generalization error</article-title><source>Mach Learn</source><year>2003</year><month>09</month><volume>52</volume><issue>3</issue><fpage>239</fpage><lpage>281</lpage><pub-id pub-id-type="doi">10.1023/A:1024068626366</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Taheriyan</surname><given-names>M</given-names> </name><name name-style="western"><surname>Safaee Nodehi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Niakan Kalhori</surname><given-names>SR</given-names> </name><name name-style="western"><surname>Mohammadzadeh</surname><given-names>N</given-names> </name></person-group><article-title>A systematic review of the predicted outcomes related to hematopoietic stem cell transplantation: focus on applied machine learning methods&#x2019; performance</article-title><source>Expert Rev Hematol</source><year>2022</year><month>02</month><day>1</day><volume>15</volume><issue>2</issue><fpage>137</fpage><lpage>156</lpage><pub-id pub-id-type="doi">10.1080/17474086.2022.2042248</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cappelli</surname><given-names>B</given-names> </name><name name-style="western"><surname>Volt</surname><given-names>F</given-names> </name><name name-style="western"><surname>Tozatto-Maio</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Risk factors and outcomes according to age at transplantation with an HLA-identical sibling for sickle cell disease</article-title><source>Haematologica</source><year>2019</year><month>12</month><volume>104</volume><issue>12</issue><fpage>e543</fpage><lpage>e546</lpage><pub-id pub-id-type="doi">10.3324/haematol.2019.216788</pub-id><pub-id pub-id-type="medline">31018975</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bola&#x00F1;os-Meade</surname><given-names>J</given-names> </name><name name-style="western"><surname>Cooke</surname><given-names>KR</given-names> </name><name name-style="western"><surname>Gamper</surname><given-names>CJ</given-names> </name><etal/></person-group><article-title>Effect of increased dose of total body irradiation on graft failure associated with HLA-haploidentical transplantation in patients with severe haemoglobinopathies: a prospective clinical trial</article-title><source>Lancet Haematol</source><year>2019</year><month>04</month><volume>6</volume><issue>4</issue><fpage>e183</fpage><lpage>e193</lpage><pub-id pub-id-type="doi">10.1016/S2352-3026(19)30031-6</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bola&#x00F1;os-Meade</surname><given-names>J</given-names> </name><name name-style="western"><surname>Brodsky</surname><given-names>RA</given-names> </name></person-group><article-title>Blood and marrow transplantation for sickle cell disease: is less more?</article-title><source>Blood Rev</source><year>2014</year><month>11</month><volume>28</volume><issue>6</issue><fpage>243</fpage><lpage>248</lpage><pub-id pub-id-type="doi">10.1016/j.blre.2014.08.001</pub-id><pub-id pub-id-type="medline">25217413</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Buczak</surname><given-names>P</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>JJ</given-names> </name><name name-style="western"><surname>Pauly</surname><given-names>M</given-names> </name></person-group><article-title>Analyzing the effect of imputation on classification performance under MCAR and MAR mssing mechanisms</article-title><source>Entropy (Basel)</source><year>2023</year><month>03</month><day>17</day><volume>25</volume><issue>3</issue><fpage>521</fpage><pub-id pub-id-type="doi">10.3390/e25030521</pub-id><pub-id pub-id-type="medline">36981409</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tang</surname><given-names>F</given-names> </name><name name-style="western"><surname>Ishwaran</surname><given-names>H</given-names> </name></person-group><article-title>Random forest missing data algorithms</article-title><source>Stat Anal Data Min</source><year>2017</year><month>12</month><volume>10</volume><issue>6</issue><fpage>363</fpage><lpage>377</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://onlinelibrary.wiley.com/toc/19321872/10/6">https://onlinelibrary.wiley.com/toc/19321872/10/6</ext-link></comment><pub-id pub-id-type="doi">10.1002/sam.11348</pub-id><pub-id pub-id-type="medline">29403567</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hong</surname><given-names>S</given-names> </name><name name-style="western"><surname>Lynn</surname><given-names>HS</given-names> </name></person-group><article-title>Accuracy of random-forest-based imputation of missing data in the presence of non-normality, non-linearity, and interaction</article-title><source>BMC Med Res Methodol</source><year>2020</year><month>07</month><day>25</day><volume>20</volume><issue>1</issue><fpage>199</fpage><pub-id pub-id-type="doi">10.1186/s12874-020-01080-1</pub-id><pub-id pub-id-type="medline">32711455</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gluckman</surname><given-names>E</given-names> </name><name name-style="western"><surname>Fuente</surname><given-names>J de la</given-names> </name><name name-style="western"><surname>Cappelli</surname><given-names>B</given-names> </name><etal/></person-group><article-title>The role of HLA matching in unrelated donor hematopoietic stem cell transplantation for sickle cell disease in Europe</article-title><source>Bone Marrow Transplant</source><year>2020</year><month>10</month><volume>55</volume><issue>10</issue><fpage>1946</fpage><lpage>1954</lpage><pub-id pub-id-type="doi">10.1038/s41409-020-0847-z</pub-id><pub-id pub-id-type="medline">32157246</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Meier</surname><given-names>ER</given-names> </name><name name-style="western"><surname>Dioguardi</surname><given-names>JV</given-names> </name><name name-style="western"><surname>Kamani</surname><given-names>N</given-names> </name></person-group><article-title>Current attitudes of parents and patients toward hematopoietic stem cell transplantation for sickle cell anemia</article-title><source>Pediatr Blood Cancer</source><year>2015</year><month>07</month><volume>62</volume><issue>7</issue><fpage>1277</fpage><lpage>1284</lpage><pub-id pub-id-type="doi">10.1002/pbc.25446</pub-id><pub-id pub-id-type="medline">25809231</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sinha</surname><given-names>CB</given-names> </name><name name-style="western"><surname>Meacham</surname><given-names>LR</given-names> </name><name name-style="western"><surname>Bakshi</surname><given-names>N</given-names> </name><name name-style="western"><surname>Ross</surname><given-names>D</given-names> </name><name name-style="western"><surname>Krishnamurti</surname><given-names>L</given-names> </name></person-group><article-title>Parental perspective on the risk of infertility and fertility preservation options for children and adolescents with sickle cell disease considering hematopoietic stem cell transplantation</article-title><source>Pediatr Blood Cancer</source><year>2023</year><month>07</month><volume>70</volume><issue>7</issue><fpage>e30276</fpage><pub-id pub-id-type="doi">10.1002/pbc.30276</pub-id><pub-id pub-id-type="medline">37051746</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Greenes</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Bates</surname><given-names>DW</given-names> </name><name name-style="western"><surname>Kawamoto</surname><given-names>K</given-names> </name><name name-style="western"><surname>Middleton</surname><given-names>B</given-names> </name><name name-style="western"><surname>Osheroff</surname><given-names>J</given-names> </name><name name-style="western"><surname>Shahar</surname><given-names>Y</given-names> </name></person-group><article-title>Clinical decision support models and frameworks: seeking to address research issues underlying implementation successes and failures</article-title><source>J Biomed Inform</source><year>2018</year><month>02</month><volume>78</volume><issue>134-43</issue><fpage>134</fpage><lpage>143</lpage><pub-id pub-id-type="doi">10.1016/j.jbi.2017.12.005</pub-id><pub-id pub-id-type="medline">29246790</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Marcial</surname><given-names>LH</given-names> </name><name name-style="western"><surname>Richardson</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Lasater</surname><given-names>B</given-names> </name><etal/></person-group><article-title>The imperative for patient&#x2011;centered clinical decision support</article-title><source>EGEMS (Wash DC)</source><year>2018</year><month>05</month><day>30</day><volume>6</volume><issue>1</issue><fpage>12</fpage><pub-id pub-id-type="doi">10.5334/egems.259</pub-id><pub-id pub-id-type="medline">30094284</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Krishnamurti</surname><given-names>L</given-names> </name><name name-style="western"><surname>Ross</surname><given-names>D</given-names> </name><name name-style="western"><surname>Sinha</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Comparative effectiveness of a web-based patient decision aid for therapeutic options for sickle cell disease: randomized controlled trial</article-title><source>J Med Internet Res</source><year>2019</year><month>12</month><day>4</day><volume>21</volume><issue>12</issue><fpage>e14462</fpage><pub-id pub-id-type="doi">10.2196/14462</pub-id><pub-id pub-id-type="medline">31799940</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Feature distribution of the dataset.</p><media xlink:href="ai_v4i1e64519_app1.docx" xlink:title="DOCX File, 21 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Outcome distribution.</p><media xlink:href="ai_v4i1e64519_app2.docx" xlink:title="DOCX File, 15 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Mean absolute Shapley additive explanations values quantifying predictor importance for acute graft-vs-host disease. Values averaged across cross-validation folds and bootstraps.</p><media xlink:href="ai_v4i1e64519_app3.pdf" xlink:title="PDF File, 77 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4</label><p>Mean absolute Shapley additive explanations values quantifying predictor importance for graft failure. Values averaged across cross-validation folds and bootstraps.</p><media xlink:href="ai_v4i1e64519_app4.pdf" xlink:title="PDF File, 168 KB"/></supplementary-material><supplementary-material id="app5"><label>Multimedia Appendix 5</label><p>Mean absolute Shapley additive explanations values quantifying predictor importance for Event Free Survival. Values averaged across cross-validation folds and bootstraps.</p><media xlink:href="ai_v4i1e64519_app5.pdf" xlink:title="PDF File, 162 KB"/></supplementary-material><supplementary-material id="app6"><label>Multimedia Appendix 6</label><p>Mean absolute Shapley additive explanations values quantifying predictor importance for CGVHD. Values averaged across cross-validation folds and bootstraps.</p><media xlink:href="ai_v4i1e64519_app6.pdf" xlink:title="PDF File, 156 KB"/></supplementary-material><supplementary-material id="app7"><label>Multimedia Appendix 7</label><p>Mean absolute Shapley additive explanations values quantifying predictor importance for overall survival. Values averaged across cross-validation folds and bootstraps.</p><media xlink:href="ai_v4i1e64519_app7.pdf" xlink:title="PDF File, 58498 KB"/></supplementary-material><supplementary-material id="app8"><label>Multimedia Appendix 8 </label><p>Predicted probability percentage outcomes for each of the hypothetical patient profiles.</p><media xlink:href="ai_v4i1e64519_app8.docx" xlink:title="DOCX File, 13 KB"/></supplementary-material><supplementary-material id="app9"><label>Multimedia Appendix 9</label><p>Combinations of conditioning regimen, serotherapy, and graft-versus-host disease prophylaxis used in published case series, which are included in the SPRIGHT.</p><media xlink:href="ai_v4i1e64519_app9.docx" xlink:title="DOCX File, 14 KB"/></supplementary-material><supplementary-material id="app10"><label>Checklist 1</label><p>Consolidated Reporting of Machine Learning Studies checklist.</p><media xlink:href="ai_v4i1e64519_app10.pdf" xlink:title="PDF File, 115 KB"/></supplementary-material></app-group></back></article>