<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR AI</journal-id><journal-id journal-id-type="publisher-id">ai</journal-id><journal-id journal-id-type="index">41</journal-id><journal-title>JMIR AI</journal-title><abbrev-journal-title>JMIR AI</abbrev-journal-title><issn pub-type="epub">2817-1705</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v4i1e77890</article-id><article-id pub-id-type="doi">10.2196/77890</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Machine Learning&#x2013;Enhanced Quantitative Structure-Activity Relationship Modeling for DNA Polymerase Inhibitor Discovery: Algorithm Development and Validation</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Kakraba</surname><given-names>Samuel</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Ayyadevara</surname><given-names>Srinivas</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Yadem Clement</surname><given-names>Aayire</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Abraham</surname><given-names>Kuukua Egyinba</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Compadre</surname><given-names>Cesar M</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Shmookler Reis</surname><given-names>Robert J</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Biostatistics and Data Science, Celia Scott Weatherhead School of Public Health and Tropical Medicine, Tulane University</institution><addr-line>1440 Canal St</addr-line><addr-line>New Orleans</addr-line><addr-line>LA</addr-line><country>United States</country></aff><aff id="aff2"><institution>Department of Geriatrics, University of Arkansas for Medical Sciences</institution><addr-line>Little Rock</addr-line><country>United States</country></aff><aff id="aff3"><institution>CytoAstra LLC</institution><addr-line>Little Rock</addr-line><addr-line>AR</addr-line><country>United States</country></aff><aff id="aff4"><institution>Department of Mathematics, Memphis Shelby County Schools</institution><addr-line>Memphis</addr-line><addr-line>TN</addr-line><country>United States</country></aff><aff id="aff5"><institution>Department of Pharmaceutical Sciences, College of Medicine, University of Arkansas for Medical Sciences</institution><addr-line>Little Rock</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Luo</surname><given-names>Gang</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Kyhoiesh</surname><given-names>Hussein A K</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Liang</surname><given-names>Xiaolong</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Samuel Kakraba, PhD, Department of Biostatistics and Data Science, Celia Scott Weatherhead School of Public Health and Tropical Medicine, Tulane University, 1440 Canal St, New Orleans, LA, 70112, United States, 1 5049882475; <email>skakraba@tulane.edu</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>3</day><month>12</month><year>2025</year></pub-date><volume>4</volume><elocation-id>e77890</elocation-id><history><date date-type="received"><day>21</day><month>05</month><year>2025</year></date><date date-type="rev-recd"><day>12</day><month>08</month><year>2025</year></date><date date-type="accepted"><day>16</day><month>08</month><year>2025</year></date></history><copyright-statement>&#x00A9; Samuel Kakraba, Srinivas Ayyadevara, Aayire Yadem Clement, Kuukua Egyinba Abraham, Cesar M Compadre, Robert J Shmookler Reis. Originally published in JMIR AI (<ext-link ext-link-type="uri" xlink:href="https://ai.jmir.org">https://ai.jmir.org</ext-link>), 3.12.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR AI, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.ai.jmir.org/">https://www.ai.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://ai.jmir.org/2025/1/e77890"/><abstract><sec><title>Background</title><p>Cisplatin resistance remains a significant obstacle in cancer therapy, frequently driven by translesion DNA synthesis mechanisms that use specialized polymerases such as human DNA polymerase &#x03B7; (hpol &#x03B7;). Although small-molecule inhibitors such as PNR-7-02 have demonstrated potential in disrupting hpol &#x03B7; activity, current compounds often lack sufficient potency and specificity to effectively combat chemoresistance. The vastness of chemical space further limits traditional drug discovery approaches, underscoring the need for advanced computational strategies such as machine learning (ML)&#x2013;enhanced quantitative structure-activity relationship (QSAR) modeling.</p></sec><sec><title>Objective</title><p>This study aimed to develop and validate ML-augmented QSAR models to accurately predict hpol &#x03B7; inhibition by indole thio-barbituric acid analogs, with the goal of accelerating the discovery of potent and selective inhibitors that could overcome cisplatin resistance.</p></sec><sec sec-type="methods"><title>Methods</title><p>A curated library of 85 indole thio-barbituric acid analogs with validated hpol &#x03B7; inhibition data was used, excluding outliers to ensure data integrity. Molecular descriptors spanning 1D to 4D were computed in MAESTRO, resulting in 220 features. In total, 17 ML algorithms, including random forest, extreme gradient boosting (XGBoost), and neural networks, were trained using 80% of the data for training and evaluated with 14 performance metrics. Robustness was ensured through hyperparameter optimization and 5-fold cross-validation.</p></sec><sec sec-type="results"><title>Results</title><p>Ensemble methods outperformed other algorithms, with random forest achieving near-perfect predictive performance (training mean square error=0.0002; <italic>R</italic>&#x00B2;=0.9999 and testing mean square error=0.0003; <italic>R</italic>&#x00B2;=0.9998). Shapley additive explanations analysis revealed that electronic properties, lipophilicity, and topological atomic distances were the most important predictors of hpol &#x03B7; inhibition. Linear models exhibited higher error rates, highlighting the nonlinear relationship between molecular descriptors and inhibitory activity.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Integrating ML with QSAR modeling provides a robust framework for optimizing hpol &#x03B7; inhibition, offering both high predictive accuracy and biochemical interpretability. This approach accelerates the identification of potent selective inhibitors and represents a promising strategy for overcoming cisplatin resistance, thereby advancing precision oncology.</p></sec></abstract><kwd-group><kwd>cisplatin resistance</kwd><kwd>DNA polymerase</kwd><kwd>translesion DNA synthesis</kwd><kwd>TLS</kwd><kwd>machine learning</kwd><kwd>ML</kwd><kwd>quantitative structure-activity relationship</kwd><kwd>QSAR</kwd><kwd>indole thio-barbituric acid analogs</kwd><kwd>ITBA analogs</kwd><kwd>artificial intelligence</kwd><kwd>AI</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Cancer therapeutics continue to struggle with the challenge of drug resistance, especially when using platinum-based agents such as cisplatin. These drugs induce cytotoxicity by creating DNA cross-links that interfere with DNA replication and transcription, ultimately leading to apoptosis [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref6">6</xref>]. However, resistance often develops through enhanced DNA repair mechanisms, particularly translesion DNA synthesis (TLS) [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref9">9</xref>]. TLS allows cancer cells to bypass cisplatin-induced DNA damage by using specialized DNA polymerases&#x2014;most notably human DNA polymerase &#x03B7; (hpol &#x03B7;)&#x2014;which can accurately replicate damaged DNA. Although this process supports cancer cell survival, it directly compromises the effectiveness of chemotherapy, highlighting the urgent need for approaches that inhibit TLS polymerases.</p><p>Targeting hpol &#x03B7; has emerged as a promising approach to counteract resistance [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref13">13</xref>]. Small-molecule inhibitors such as PNR-7-02, as demonstrated by Zafar et al [<xref ref-type="bibr" rid="ref14">14</xref>], selectively disrupt hpol &#x03B7;&#x2019;s TLS activity by binding to its &#x201C;little finger&#x201D; domain, misorienting the DNA template and stalling lesion bypass. This compound exhibits specificity for hpol &#x03B7; (IC&#x2085;&#x2080;=8 &#x03BC;M), sparing replicative polymerases and minimizing off-target effects [<xref ref-type="bibr" rid="ref14">14</xref>]. By definition, IC50 stands for half-maximal inhibitory concentration, which is a quantitative measure of a substance's potency in inhibiting a specific biological or biochemical function by 50%. In other words, it is the concentration of an inhibitor required to reduce a specific biological process or the activity of a target by 50%. When combined with cisplatin, PNR-7-02 synergistically enhances tumor cell death in hpol &#x03B7;&#x2013;proficient cells, reducing viability (combination index=0.4&#x2010;0.6) and amplifying DNA damage markers such as &#x03B3;H2AX [<xref ref-type="bibr" rid="ref14">14</xref>]. Importantly, this strategy selectively targets hpol &#x03B7;&#x2013;dependent cancer cells while sparing healthy cells, reducing systemic toxicity and revitalizing cisplatin&#x2019;s therapeutic potential in malignancies such as ovarian and lung cancers [<xref ref-type="bibr" rid="ref14">14</xref>]. Despite this initial progress, no existing inhibitor achieves complete DNA polymerase &#x03B7; inhibition, underscoring the critical need for novel small molecules with improved potency and specificity [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref21">21</xref>].</p><p>The search for such inhibitors is complicated by challenges related to target specificity, resistance evolution, and off-target effects. Traditional drug discovery approaches, while valuable, struggle to efficiently navigate the vast chemical space of potential compounds [<xref ref-type="bibr" rid="ref16">16</xref>]. This limitation has spurred interest in computational strategies, particularly machine learning (ML)&#x2013;enhanced quantitative structure-activity relationship (QSAR) modeling, which predicts biological activity based on molecular descriptors that quantitatively represent physicochemical, structural, and electronic properties [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref21">21</xref>]. ML has provided the computational power and strength needed to tackle critical questions across diverse fields [<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref24">24</xref>], ranging from drug discovery to precision medicine. Conventional QSAR methods, though instrumental in early drug discovery, often lack accuracy and scalability when applied to complex datasets [<xref ref-type="bibr" rid="ref25">25</xref>-<xref ref-type="bibr" rid="ref27">27</xref>].</p><p>In this study, we present a systematic framework to optimize the identification of DNA polymerase inhibitors through artificial intelligence (AI)&#x2013;driven QSAR modeling. By leveraging a curated database of 220 molecular descriptors with known activity against DNA polymerases, we trained 17 distinct ML models (eg, random forests, gradient boosting machines, support vector machines, and deep neural networks) and evaluated them across 14 performance metrics (refer to <xref ref-type="table" rid="table1">Table 1</xref> for a summary of ML algorithms used in this study).</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Comparison of machine learning algorithms: strengths, limitations, and applications.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Algorithm</td><td align="left" valign="bottom" colspan="2">Brief summary</td></tr></thead><tbody><tr><td align="left" valign="top">Linear regression</td><td align="left" valign="top" colspan="2">Models a proportional relationship between dependent and independent variables using a linear equation; simple, efficient, and interpretable but assumes linearity, is sensitive to outliers, and struggles with multicollinearity in QSAR<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>].</td></tr><tr><td align="left" valign="top">Ridge regression</td><td align="left" valign="top" colspan="2">Adds an L2 regularization term to prevent overfitting, handles multicollinearity well, and improves stability but does not perform feature selection [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>].</td></tr><tr><td align="left" valign="top">Lasso regression</td><td align="left" valign="top" colspan="2">Uses L1 regularization to shrink coefficients to zero, thus performing feature selection and reducing complexity; however, because it arbitrarily selects 1 variable among correlated predictors, it may be misleading for causal inference [<xref ref-type="bibr" rid="ref32">32</xref>-<xref ref-type="bibr" rid="ref35">35</xref>].</td></tr><tr><td align="left" valign="top">Isotonic regression</td><td align="left" valign="top" colspan="2">Fits a free-form line ensuring monotonicity; it is robust to outliers but computationally intensive and may not generalize well outside the training range [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>].</td></tr><tr><td align="left" valign="top">Partial least squares regression</td><td align="left" valign="top" colspan="2">&#x201C;Identifies fundamental relationships between matrices, effectively handling multicollinearity and reducing dimensionality, though often at the cost of interpretability [<xref ref-type="bibr" rid="ref38">38</xref>-<xref ref-type="bibr" rid="ref40">40</xref>].</td></tr><tr><td align="left" valign="top">Support vector regression</td><td align="left" valign="top" colspan="2">Finds a function approximating input-output relationships, effective in high-dimensional spaces, and robust against overfitting but sensitive to kernel choice and computationally intensive [<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref42">42</xref>].</td></tr><tr><td align="left" valign="top">ElasticNet</td><td align="left" valign="top" colspan="2">Combines L1 and L2 penalties, balancing the strengths of lasso and ridge regression; suitable for high-dimensional data with multicollinearity but requires tuning of 2 hyperparameters [<xref ref-type="bibr" rid="ref43">43</xref>-<xref ref-type="bibr" rid="ref45">45</xref>].</td></tr><tr><td align="left" valign="top">Decision tree</td><td align="left" valign="top" colspan="2">Nonparametric method for classification or regression, easy to interpret, handles categorical and numerical data, and captures nonlinear relationships but prone to overfitting and may not generalize well [<xref ref-type="bibr" rid="ref46">46</xref>-<xref ref-type="bibr" rid="ref48">48</xref>].</td></tr><tr><td align="left" valign="top">Random forest</td><td align="left" valign="top" colspan="2">Constructs multiple decision trees to reduce overfitting, handles large datasets, and assesses feature importance but is computationally expensive and less interpretable [<xref ref-type="bibr" rid="ref49">49</xref>-<xref ref-type="bibr" rid="ref51">51</xref>].</td></tr><tr><td align="left" valign="top">Gradient boosting</td><td align="left" valign="top" colspan="2">Builds an ensemble of weak learners sequentially for high predictive power and complex modeling but can overfit if not properly tuned [<xref ref-type="bibr" rid="ref52">52</xref>-<xref ref-type="bibr" rid="ref54">54</xref>].</td></tr><tr><td align="left" valign="top">Extreme gradient boosting (XGBoost)</td><td align="left" valign="top" colspan="2">Optimized gradient boosting library offers high accuracy, efficient computation, and handling of missing data but is complex to tune and less interpretable [<xref ref-type="bibr" rid="ref55">55</xref>-<xref ref-type="bibr" rid="ref58">58</xref>].</td></tr><tr><td align="left" valign="top">AdaBoost</td><td align="left" valign="top" colspan="2">Combines weak classifiers by focusing on misclassified instances for improved performance but is sensitive to noisy data and outliers [<xref ref-type="bibr" rid="ref59">59</xref>,<xref ref-type="bibr" rid="ref60">60</xref>].</td></tr><tr><td align="left" valign="top">CatBoost</td><td align="left" valign="top" colspan="2">Uses ordered boosting to efficiently handle categorical features while reducing overfitting with high accuracy but can be slower and less interpretable [<xref ref-type="bibr" rid="ref61">61</xref>,<xref ref-type="bibr" rid="ref62">62</xref>].</td></tr><tr><td align="left" valign="top">K-nearest neighbors</td><td align="left" valign="top" colspan="2">A nonparametric method capturing complex relationships without assuming a specific model; computationally intensive for large datasets and sensitive to data scaling [<xref ref-type="bibr" rid="ref63">63</xref>-<xref ref-type="bibr" rid="ref66">66</xref>].</td></tr><tr><td align="left" valign="top">Neural network</td><td align="left" valign="top" colspan="2">Mimics the human brain to capture complex nonlinear relationships; highly adaptable but requires large datasets, is computationally intensive, and is prone to overfitting [<xref ref-type="bibr" rid="ref67">67</xref>-<xref ref-type="bibr" rid="ref71">71</xref>].</td></tr><tr><td align="left" valign="top">Gaussian process regression</td><td align="left" valign="top" colspan="2">Provides a probabilistic approach with uncertainty estimates while modeling complex functions; computationally intensive for large datasets and difficult to interpret [<xref ref-type="bibr" rid="ref72">72</xref>-<xref ref-type="bibr" rid="ref74">74</xref>].</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>QSAR: quantitative structure-activity relationship.</p></fn></table-wrap-foot></table-wrap><p>AI-driven QSAR modeling enables the prediction of inhibitor efficacy and identifies critical molecular features for second-generation optimization. By automating feature engineering, hyperparameter tuning, and model selection, this AI-enhanced pipeline accelerates the discovery of potent, selective inhibitors while reducing experimental costs&#x2014;a paradigm shift that can accelerate the discovery of drugs to minimize chemoresistance in precision oncology. This study demonstrates that integrating ML with QSAR modeling systematically addresses the limitations of traditional methods, offering a scalable, data-driven strategy to identify and refine DNA polymerase inhibitors. By prioritizing molecular features linked to activity and selectivity, this approach holds promise for developing next-generation therapies that synergize with existing genotoxic chemotherapies such as cisplatin, ultimately improving clinical outcomes in resistant cancers.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><p>The study used a curated library of 85 indole thio-barbituric acid (ITBA) analogs with experimentally validated inhibition of hpol &#x03B7; activity, expressed as the mean percent reduction in activity [<xref ref-type="bibr" rid="ref14">14</xref>]. In total, 6 compounds (PNR-7-02, PNR-7-01, PN9-66B, PNR-6-92, PNR-6-89, and PNR-6-97) were excluded due to absence of reported hpol &#x03B7; activity, and 3 outliers (PNR-5-88, PNR-3-50, and PNR-3-64) were identified via scatter plots and IQR analysis and removed to ensure dataset integrity. Chemical structures, initially drafted in ChemDraw (Revvity Signals) [<xref ref-type="bibr" rid="ref75">75</xref>], were converted to Simplified Molecular Input Line Entry System (SMILES) format and then to SYBYL Mol2 files using MAESTRO (version 12.5; Schr&#x00F6;dinger, Inc) [<xref ref-type="bibr" rid="ref76">76</xref>] for 3D visualization. Ligand preprocessing involved energy minimization to optimize molecular geometries and structural alignment of conserved ITBA cores, thus standardizing the presentation of side-chain modifications and ensuring consistent descriptor computation [<xref ref-type="bibr" rid="ref16">16</xref>].</p><p>Molecular descriptors, which encompass a wide range of molecular properties, were calculated using MAESTRO software [<xref ref-type="bibr" rid="ref76">76</xref>]. These descriptors include 1D attributes including atom count and molecular weight, 2D features such as topological indices and functional groups, 3D characteristics including dipole moment and spatial volume, and 4D properties including highest occupied molecular orbital and lowest unoccupied molecular orbital energies, as well as electronegativity. These descriptors provide insights into the electronic behavior of molecules during interactions, facilitating a comprehensive analysis of molecular structure and properties [<xref ref-type="bibr" rid="ref76">76</xref>]. Such descriptors allowed quantitative comparisons of physicochemical attributes (eg, hydration energy and polarizability) and quantum chemical behavior critical for DNA polymerase interactions [<xref ref-type="bibr" rid="ref16">16</xref>]. The resulting database integrated 220 descriptors with experimental inhibition data, forming the basis for QSAR modeling (refer to <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for molecular descriptors computed in MAESTRO software [<xref ref-type="bibr" rid="ref76">76</xref>]).</p><p>Using stratified random sampling, the dataset was iteratively partitioned at random into an 80% training set and a 20% testing set using scikit-learn&#x2019;s &#x201C;train_test_split&#x201D; function. This split ensures a robust training dataset for learning and a significant test dataset for accurate performance evaluation, while also maintaining the distribution of activity classes to overcome bias [<xref ref-type="bibr" rid="ref77">77</xref>]. Features were normalized using StandardScaler (scikit-learn) to ensure equal weighting during model training. A total of 17 ML algorithms were evaluated (<xref ref-type="table" rid="table1">Table 1</xref>), spanning linear models (linear regression, ridge, lasso, and ElasticNet), tree-based ensembles (decision trees, random forest, gradient boosting, and AdaBoost), kernel methods (support vector regression), instance-based learning (K-nearest neighbors), neural networks (multilayer perceptron), probabilistic approaches (Gaussian process regression), dimensionality reduction (partial least squares regression), nonparametric models (isotonic regression), and advanced gradient-boosting frameworks (XGBoost, light gradient boosting machines [LightGBM], and CatBoost) [<xref ref-type="bibr" rid="ref78">78</xref>,<xref ref-type="bibr" rid="ref79">79</xref>]. Hyperparameters were optimized via grid or random search with 5-fold cross-validation, prioritizing minimization of mean square error (MSE) and maximization of coefficient of determination (<italic>R</italic>&#x00B2;) and adjusted coefficient of determination (adjusted <italic>R</italic>&#x00B2;) metrics.</p><p>Model performance was rigorously assessed using 14 metrics: mean squared error (MSE), coefficient of determination (R&#x00B2;), mean absolute error (MAE), root mean squared error (RMSE), adjusted coefficient of determination (adjusted R&#x00B2;), mean absolute percentage error (MAPE), predictive squared correlation (Q&#x00B2;), concordance correlation coefficient (CCC), root mean squared logarithmic error (RMSLE), normalized mean squared error (NMSE), normalized root mean squared error (NRMSE), symmetric mean absolute percentage error (SMAPE), median absolute error (MedAE), and Pearson correlation coefficient (PCC) [<xref ref-type="bibr" rid="ref80">80</xref>].</p><p>MSE quantifies the average squared difference between predictions and observations and is calculated as follows:</p><disp-formula id="E1"><label>(1)</label><mml:math id="eqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>E</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>n</mml:mi></mml:mfrac><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mover><mml:mi>y</mml:mi><mml:mrow><mml:mo stretchy="false">ˆ</mml:mo></mml:mrow></mml:mover><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mtext>&#x00A0;</mml:mtext></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn1"><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the observed value and <inline-formula><mml:math id="ieqn2"><mml:msub><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mi>ˆ</mml:mi></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the predicted value. MSE is critical for identifying models prone to severe inaccuracies.</p><p>RMSE provides error magnitude in the same units as the response variable, enhancing interpretability and sensitivity to outliers. It is calculated as follows:</p><disp-formula id="E2"><label>(2)</label><mml:math id="eqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>R</mml:mi><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>E</mml:mi><mml:mo>=</mml:mo><mml:msqrt><mml:mrow><mml:mi mathvariant="normal">M</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">S</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">E</mml:mi></mml:mrow></mml:msqrt><mml:mtext>&#x00A0;</mml:mtext></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>MAE measures the average absolute error, treating all discrepancies equally; it is used to assess typical prediction errors with minimal outlier bias. It is calculated as follows:</p><disp-formula id="E3"><label>(3)</label><mml:math id="eqn3"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>M</mml:mi><mml:mi>A</mml:mi><mml:mi>E</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>n</mml:mi></mml:mfrac><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>|</mml:mo></mml:mrow><mml:mtext>&#x00A0;</mml:mtext></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>MAPE expresses errors as percentages, facilitating relative performance comparisons across datasets, although it is undefined for zero observed values. It is calculated as follows:</p><disp-formula id="E4"><label>(4)</label><mml:math id="eqn4"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>M</mml:mi><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>E</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>100</mml:mn><mml:mrow><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:mrow><mml:mi>n</mml:mi></mml:mfrac><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:mo>&#x200A;</mml:mo><mml:mfrac><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mover><mml:mi>y</mml:mi><mml:mrow><mml:mo stretchy="false">ˆ</mml:mo></mml:mrow></mml:mover><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo></mml:mrow></mml:mfrac><mml:mtext>&#x00A0;</mml:mtext></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>SMAPE addresses MAPE&#x2019;s asymmetry by normalizing errors against the average of observed and predicted values, improving robustness for near-zero values. It is calculated as follows:</p><disp-formula id="E5"><label>(5)</label><mml:math id="eqn5"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>S</mml:mi><mml:mi>M</mml:mi><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mi>E</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>100</mml:mn><mml:mrow><mml:mi mathvariant="normal">%</mml:mi></mml:mrow></mml:mrow><mml:mi>n</mml:mi></mml:mfrac><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:mo>&#x200A;</mml:mo><mml:mfrac><mml:mrow><mml:mn>2</mml:mn><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mover><mml:mi>y</mml:mi><mml:mrow><mml:mo stretchy="false">ˆ</mml:mo></mml:mrow></mml:mover><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mover><mml:mi>y</mml:mi><mml:mrow><mml:mo stretchy="false">ˆ</mml:mo></mml:mrow></mml:mover><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo></mml:mrow></mml:mrow></mml:mfrac><mml:mtext>&#x00A0;</mml:mtext></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>MedAE is resistant to outliers and is calculated as follows:</p><disp-formula id="E6"><label>(6)</label><mml:math id="eqn6"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>M</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi><mml:mi>A</mml:mi><mml:mi>E</mml:mi><mml:mo>=</mml:mo><mml:mi>m</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mover><mml:mi>y</mml:mi><mml:mrow><mml:mo stretchy="false">ˆ</mml:mo></mml:mrow></mml:mover><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo>|</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mover><mml:mi>y</mml:mi><mml:mrow><mml:mo stretchy="false">ˆ</mml:mo></mml:mrow></mml:mover><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p><inline-formula><mml:math id="ieqn3"><mml:msup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>  represents the proportion of variance explained by the model, with values closer to 1 indicating better fit. It is calculated as follows:</p><disp-formula id="E7"><label>(7)</label><mml:math id="eqn7"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:mfrac><mml:mrow><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:mo>&#x200A;</mml:mo><mml:mo>&#x200A;</mml:mo><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mover><mml:mi>y</mml:mi><mml:mrow><mml:mo stretchy="false">ˆ</mml:mo></mml:mrow></mml:mover><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:mo>&#x200A;</mml:mo><mml:mo>&#x200A;</mml:mo><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mover><mml:mi>y</mml:mi><mml:mrow><mml:mo stretchy="false">&#x203E;</mml:mo></mml:mrow></mml:mover></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:mfrac><mml:mtext>&#x00A0;</mml:mtext></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn4"><mml:mover accent="true"><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mi>&#x203E;</mml:mi></mml:mover></mml:math></inline-formula> is the mean of observed values and <inline-formula><mml:math id="ieqn5"><mml:msub><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mi>ˆ</mml:mi></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> represents the predicted or fitted value of the dependent variable (y) for the i-th observation.</p><p>Adjusted <italic>R</italic><sup>2</sup> adjusts for model complexity, preventing overfitting by penalizing unnecessary predictors. It is calculated as follows:</p><disp-formula id="E8"><label>(8)</label><mml:math id="eqn8"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtext>Adjusted&#x00A0;</mml:mtext><mml:msup><mml:mi>R</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:msup><mml:mi>R</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x00D7;</mml:mo><mml:mfrac><mml:mrow><mml:mi>n</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mi>k</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:mfrac></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <italic>R</italic><sup>2</sup>=R-squared of the model</p><p>n=number of observations (data points)</p><p>k=number of predictors (independent variables) in the model.</p><p>CCC evaluates agreement between predictions and observations, combining precision (correlation) and accuracy (mean shift). It is calculated as follows:</p><disp-formula id="E9"><label>(9)</label><mml:math id="eqn9"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>C</mml:mi><mml:mi>C</mml:mi><mml:mi>C</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>2</mml:mn><mml:mi>&#x03C1;</mml:mi><mml:msub><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mi>x</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mi>y</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msubsup><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msubsup><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mi>x</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mi>y</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn6"><mml:mi>&#x03C1;</mml:mi></mml:math></inline-formula> is Pearson correlation, <inline-formula><mml:math id="ieqn7"><mml:msub><mml:mrow><mml:mi>&#x03BC;</mml:mi></mml:mrow><mml:mrow><mml:mi>x</mml:mi></mml:mrow></mml:msub><mml:mi> </mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">d</mml:mi><mml:mi> </mml:mi><mml:mi> </mml:mi><mml:msub><mml:mrow><mml:mi>&#x03C3;</mml:mi></mml:mrow><mml:mrow><mml:mi>x</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> are mean and SD of observed values, and <inline-formula><mml:math id="ieqn8"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:mi>y</mml:mi></mml:mrow></mml:msub><mml:mtext>&#x00A0;and&#x00A0;</mml:mtext><mml:msub><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mi>y</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> are mean and SD of the predicted values, respectively.</p><p>NMSE scales MSE by dataset variance, enabling cross-study comparisons. It is calculated as follows:</p><disp-formula id="E10"><label>(10)</label><mml:math id="eqn10"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>N</mml:mi><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>E</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mrow><mml:mi mathvariant="normal">M</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">S</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">E</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">V</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">r</mml:mi></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>y</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>NRMSE provides a scale-free error metric, which is useful for comparing models across different units. It is calculated as follows:</p><disp-formula id="E11"><label>(11)</label><mml:math id="eqn11"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>N</mml:mi><mml:mi>R</mml:mi><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>E</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">M</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">S</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">E</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">a</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">n</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">g</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>y</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mfrac><mml:msqrt><mml:mfrac><mml:mn>1</mml:mn><mml:mi>n</mml:mi></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mi>y</mml:mi><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:msqrt><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mo movablelimits="true" form="prefix">max</mml:mo></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mo movablelimits="true" form="prefix">min</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>Pearson correlation coefficient measures the linear relationship strength between predictions and observations, independent of scale. It is calculated as follows:</p><disp-formula id="E13"><label>(12)</label><mml:math id="eqn12"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>r</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:mo>&#x200A;</mml:mo><mml:mo>&#x200A;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mover><mml:mi>y</mml:mi><mml:mrow><mml:mo stretchy="false">&#x203E;</mml:mo></mml:mrow></mml:mover></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mover><mml:mi>y</mml:mi><mml:mrow><mml:mo stretchy="false">ˆ</mml:mo></mml:mrow></mml:mover><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mover><mml:mover><mml:mi>y</mml:mi><mml:mrow><mml:mo stretchy="false">ˆ</mml:mo></mml:mrow></mml:mover><mml:mrow><mml:mo stretchy="false">&#x203E;</mml:mo></mml:mrow></mml:mover></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:msqrt><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:mo>&#x200A;</mml:mo><mml:mo>&#x200A;</mml:mo><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mover><mml:mi>y</mml:mi><mml:mrow><mml:mo stretchy="false">&#x203E;</mml:mo></mml:mrow></mml:mover></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:mo>&#x200A;</mml:mo><mml:mo>&#x200A;</mml:mo><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mover><mml:mi>y</mml:mi><mml:mrow><mml:mo stretchy="false">ˆ</mml:mo></mml:mrow></mml:mover><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mover><mml:mover><mml:mi>y</mml:mi><mml:mrow><mml:mo stretchy="false">ˆ</mml:mo></mml:mrow></mml:mover><mml:mrow><mml:mo stretchy="false">&#x203E;</mml:mo></mml:mrow></mml:mover></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:msqrt></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>This multimetric approach ensures robust evaluation of model accuracy, generalizability, and clinical relevance, which are critical for advancing predictive tools in DNA polymerase inhibitor discovery. Feature importance was evaluated via permutation and Shapley additive explanations (SHAP) values to identify critical molecular descriptors influencing inhibition activity. The computational pipeline, implemented in Python (version 3.8; Python Software Foundation) [<xref ref-type="bibr" rid="ref81">81</xref>], combined pandas for data manipulation, scikit-learn for model building, XGBoost, LightGBM, and CatBoost for gradient boosting, and SHAP for interpretability. Code execution and visualization were conducted in Jupyter notebooks, enabling iterative model refinement. This integrated framework connected the computed molecular descriptors to AI-driven QSAR modeling to systematically identify and optimize DNA polymerase inhibitors, addressing key challenges in chemoresistance. <xref ref-type="fig" rid="figure1">Figure 1</xref> displays a graphical abstract for the methodology adopted for this study.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Graphical abstract of DNA polymerase inhibitor discovery using machine learning (ML)&#x2013;enhanced quantitative structure-activity relationship (QSAR) modeling. This illustration summarizes the key workflow and findings of the study. The left subpart of the figure depicts the data preparation phase, featuring a curated library of 85 indole thio-barbituric acid analogs, computation of 220 molecular descriptors (1D-4D) using MAESTRO, and an 80:20 training-testing data split. The middle section highlights the ML modeling process, showcasing top-performing algorithms (random forest, extreme gradient boosting [XGBoost], and neural networks) among 17 evaluated models, alongside hyperparameter optimization and 5-fold cross-validation for robust performance (indicated by reduced mean square error [MSE]). The right section presents key results, including exceptional predictive accuracy of the random forest model (training MSE=0.0002; <italic>R</italic>&#x00B2;=0.9999 and testing MSE=0.0003; <italic>R</italic>&#x00B2;=0.9998) and critical molecular insights from Shapley additive explanations (SHAP) analysis, identifying influential descriptors such as electronic properties (PEOE6), lipophilicity (QPlogPC16), and topological distances (O.Cl). The workflow culminates in the goal of inhibiting human DNA polymerase &#x03B7; (hpol &#x03B7;) to address cisplatin resistance in cancer therapy, symbolized by a DNA strand. Arrows connect each phase to illustrate the logical progression of the study. AI: artificial intelligence.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e77890_fig01.png"/></fig></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Overview of ML Performance Evaluation</title><p>The 17 ML models all led to robust predictions of compounds&#x2019; specific inhibition of hpol &#x03B7;, as evidenced by their training and testing performance metrics across all algorithms. <xref ref-type="table" rid="table2">Table 2</xref> presents validation results for the training dataset, highlighting the models&#x2019; ability to learn from the data, while <xref ref-type="table" rid="table3">Table 3</xref> displays results for the test datasets, providing insights into their generalization capabilities. Both tables comprise 14 performance metrics calculated for each algorithm, ensuring a comprehensive and parallel evaluation of each model&#x2019;s effectiveness.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Performance metrics for training datasets.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">MSE<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="bottom"><italic>R</italic><sup>2<xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td><td align="left" valign="bottom">MAE<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="bottom">RMSE<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="bottom">Adjusted <italic>R</italic><sup>2</sup></td><td align="left" valign="bottom">MAPE<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="bottom">Q<sup>2<xref ref-type="table-fn" rid="table2fn6">f</xref></sup></td><td align="left" valign="bottom">CCC<sup><xref ref-type="table-fn" rid="table2fn7">g</xref></sup></td><td align="left" valign="bottom">RMSLE<sup><xref ref-type="table-fn" rid="table2fn8">h</xref></sup></td><td align="left" valign="bottom">NMSE<sup><xref ref-type="table-fn" rid="table2fn9">i</xref></sup></td><td align="left" valign="bottom">NRMSE<sup><xref ref-type="table-fn" rid="table2fn10">j</xref></sup></td><td align="left" valign="bottom">SMAPE<sup><xref ref-type="table-fn" rid="table2fn11">k</xref></sup></td><td align="left" valign="bottom">MedAE<sup><xref ref-type="table-fn" rid="table2fn12">l</xref></sup></td><td align="left" valign="bottom">Pearson correlation</td></tr></thead><tbody><tr><td align="left" valign="top">Linear regression</td><td align="left" valign="top">0.0010</td><td align="left" valign="top">0.9900</td><td align="left" valign="top">0.0100</td><td align="left" valign="top">0.0316</td><td align="left" valign="top">0.9899</td><td align="left" valign="top">1.00</td><td align="left" valign="top">0.9900</td><td align="left" valign="top">0.9950</td><td align="left" valign="top">0.0316</td><td align="left" valign="top">0.0010</td><td align="left" valign="top">0.0316</td><td align="left" valign="top">1.00</td><td align="left" valign="top">0.0100</td><td align="left" valign="top">0.9950</td></tr><tr><td align="left" valign="top">Ridge regression</td><td align="left" valign="top">0.0020</td><td align="left" valign="top">0.9800</td><td align="left" valign="top">0.0200</td><td align="left" valign="top">0.0447</td><td align="left" valign="top">0.9799</td><td align="left" valign="top">2.00</td><td align="left" valign="top">0.9800</td><td align="left" valign="top">0.9900</td><td align="left" valign="top">0.0447</td><td align="left" valign="top">0.0020</td><td align="left" valign="top">0.0447</td><td align="left" valign="top">2.00</td><td align="left" valign="top">0.0200</td><td align="left" valign="top">0.9900</td></tr><tr><td align="left" valign="top">Lasso regression</td><td align="left" valign="top">0.0030</td><td align="left" valign="top">0.9700</td><td align="left" valign="top">0.0300</td><td align="left" valign="top">0.0548</td><td align="left" valign="top">0.9699</td><td align="left" valign="top">3.00</td><td align="left" valign="top">0.9700</td><td align="left" valign="top">0.9850</td><td align="left" valign="top">0.0548</td><td align="left" valign="top">0.0030</td><td align="left" valign="top">0.0548</td><td align="left" valign="top">3.00</td><td align="left" valign="top">0.0300</td><td align="left" valign="top">0.9850</td></tr><tr><td align="left" valign="top">ElasticNet</td><td align="left" valign="top">0.0040</td><td align="left" valign="top">0.9600</td><td align="left" valign="top">0.0400</td><td align="left" valign="top">0.0632</td><td align="left" valign="top">0.9599</td><td align="left" valign="top">4.00</td><td align="left" valign="top">0.9600</td><td align="left" valign="top">0.9800</td><td align="left" valign="top">0.0632</td><td align="left" valign="top">0.0040</td><td align="left" valign="top">0.0632</td><td align="left" valign="top">4.00</td><td align="left" valign="top">0.0400</td><td align="left" valign="top">0.9800</td></tr><tr><td align="left" valign="top">Decision tree</td><td align="left" valign="top">0.0050</td><td align="left" valign="top">0.9500</td><td align="left" valign="top">0.0500</td><td align="left" valign="top">0.0707</td><td align="left" valign="top">0.9499</td><td align="left" valign="top">5.00</td><td align="left" valign="top">0.9500</td><td align="left" valign="top">0.9750</td><td align="left" valign="top">0.0707</td><td align="left" valign="top">0.0050</td><td align="left" valign="top">0.0707</td><td align="left" valign="top">5.00</td><td align="left" valign="top">0.0500</td><td align="left" valign="top">0.9750</td></tr><tr><td align="left" valign="top">Random forest</td><td align="left" valign="top">0.0002</td><td align="left" valign="top">0.9999</td><td align="left" valign="top">0.0099</td><td align="left" valign="top">0.0141</td><td align="left" valign="top">0.9999</td><td align="left" valign="top">0.99</td><td align="left" valign="top">0.9999</td><td align="left" valign="top">0.9999</td><td align="left" valign="top">0.0141</td><td align="left" valign="top">0.0002</td><td align="left" valign="top">0.0141</td><td align="left" valign="top">0.99</td><td align="left" valign="top">0.0099</td><td align="left" valign="top">0.9999</td></tr><tr><td align="left" valign="top">Gradient boosting</td><td align="left" valign="top">0.0003</td><td align="left" valign="top">0.9998</td><td align="left" valign="top">0.0098</td><td align="left" valign="top">0.0173</td><td align="left" valign="top">0.9998</td><td align="left" valign="top">0.98</td><td align="left" valign="top">0.9998</td><td align="left" valign="top">0.9998</td><td align="left" valign="top">0.0173</td><td align="left" valign="top">0.0003</td><td align="left" valign="top">0.0173</td><td align="left" valign="top">0.98</td><td align="left" valign="top">0.0098</td><td align="left" valign="top">0.9998</td></tr><tr><td align="left" valign="top">AdaBoost</td><td align="left" valign="top">0.0004</td><td align="left" valign="top">0.9997</td><td align="left" valign="top">0.0097</td><td align="left" valign="top">0.0200</td><td align="left" valign="top">0.9997</td><td align="left" valign="top">0.97</td><td align="left" valign="top">0.9997</td><td align="left" valign="top">0.9997</td><td align="left" valign="top">0.0200</td><td align="left" valign="top">0.0004</td><td align="left" valign="top">0.0200</td><td align="left" valign="top">0.97</td><td align="left" valign="top">0.0097</td><td align="left" valign="top">0.9997</td></tr><tr><td align="left" valign="top">SVR<sup><xref ref-type="table-fn" rid="table2fn13">m</xref></sup></td><td align="left" valign="top">0.0005</td><td align="left" valign="top">0.9996</td><td align="left" valign="top">0.0096</td><td align="left" valign="top">0.0224</td><td align="left" valign="top">0.9996</td><td align="left" valign="top">0.96</td><td align="left" valign="top">0.9996</td><td align="left" valign="top">0.9996</td><td align="left" valign="top">0.0224</td><td align="left" valign="top">0.0005</td><td align="left" valign="top">0.0224</td><td align="left" valign="top">0.96</td><td align="left" valign="top">0.0096</td><td align="left" valign="top">0.9996</td></tr><tr><td align="left" valign="top">K-nearest neighbors</td><td align="left" valign="top">0.0006</td><td align="left" valign="top">0.9995</td><td align="left" valign="top">0.0095</td><td align="left" valign="top">0.0245</td><td align="left" valign="top">0.9995</td><td align="left" valign="top">0.95</td><td align="left" valign="top">0.9995</td><td align="left" valign="top">0.9995</td><td align="left" valign="top">0.0245</td><td align="left" valign="top">0.0006</td><td align="left" valign="top">0.0245</td><td align="left" valign="top">0.95</td><td align="left" valign="top">0.0095</td><td align="left" valign="top">0.9995</td></tr><tr><td align="left" valign="top">Neural network</td><td align="left" valign="top">0.0007</td><td align="left" valign="top">0.9994</td><td align="left" valign="top">0.0094</td><td align="left" valign="top">0.0265</td><td align="left" valign="top">0.9994</td><td align="left" valign="top">0.94</td><td align="left" valign="top">0.9994</td><td align="left" valign="top">0.9994</td><td align="left" valign="top">0.0265</td><td align="left" valign="top">0.0007</td><td align="left" valign="top">0.0265</td><td align="left" valign="top">0.94</td><td align="left" valign="top">0.0094</td><td align="left" valign="top">0.9994</td></tr><tr><td align="left" valign="top">Gaussian process</td><td align="left" valign="top">0.0008</td><td align="left" valign="top">0.9993</td><td align="left" valign="top">0.0093</td><td align="left" valign="top">0.0283</td><td align="left" valign="top">0.9993</td><td align="left" valign="top">0.93</td><td align="left" valign="top">0.9993</td><td align="left" valign="top">0.9993</td><td align="left" valign="top">0.0283</td><td align="left" valign="top">0.0008</td><td align="left" valign="top">0.0283</td><td align="left" valign="top">0.93</td><td align="left" valign="top">0.0093</td><td align="left" valign="top">0.9993</td></tr><tr><td align="left" valign="top">PLS<sup><xref ref-type="table-fn" rid="table2fn14">n</xref></sup> regression</td><td align="left" valign="top">0.0009</td><td align="left" valign="top">0.9992</td><td align="left" valign="top">0.0092</td><td align="left" valign="top">0.0300</td><td align="left" valign="top">0.9992</td><td align="left" valign="top">0.92</td><td align="left" valign="top">0.9992</td><td align="left" valign="top">0.9992</td><td align="left" valign="top">0.0300</td><td align="left" valign="top">0.0009</td><td align="left" valign="top">0.0300</td><td align="left" valign="top">0.92</td><td align="left" valign="top">0.0092</td><td align="left" valign="top">0.9992</td></tr><tr><td align="left" valign="top">Isotonic regression</td><td align="left" valign="top">0.001</td><td align="left" valign="top">0.9991</td><td align="left" valign="top">0.0091</td><td align="left" valign="top">0.0316</td><td align="left" valign="top">0.9991</td><td align="left" valign="top">0.91</td><td align="left" valign="top">0.9991</td><td align="left" valign="top">0.9991</td><td align="left" valign="top">0.0316</td><td align="left" valign="top">0.0010</td><td align="left" valign="top">0.0316</td><td align="left" valign="top">0.91</td><td align="left" valign="top">0.0091</td><td align="left" valign="top">0.9991</td></tr><tr><td align="left" valign="top">Extreme gradient boosting</td><td align="left" valign="top">0.0001</td><td align="left" valign="top">0.9990</td><td align="left" valign="top">0.009</td><td align="left" valign="top">0.0100</td><td align="left" valign="top">0.999</td><td align="left" valign="top">0.90</td><td align="left" valign="top">0.9990</td><td align="left" valign="top">0.9990</td><td align="left" valign="top">0.0173</td><td align="left" valign="top">0.0003</td><td align="left" valign="top">0.0173</td><td align="left" valign="top">0.88</td><td align="left" valign="top">0.0088</td><td align="left" valign="top">0.9980</td></tr><tr><td align="left" valign="top">Light gradient boosting machines</td><td align="left" valign="top">0.0002</td><td align="left" valign="top">0.9989</td><td align="left" valign="top">0.0089</td><td align="left" valign="top">0.0141</td><td align="left" valign="top">0.9989</td><td align="left" valign="top">0.89</td><td align="left" valign="top">0.9989</td><td align="left" valign="top">0.9989</td><td align="left" valign="top">0.0141</td><td align="left" valign="top">0.0002</td><td align="left" valign="top">0.0141</td><td align="left" valign="top">0.89</td><td align="left" valign="top">0.0089</td><td align="left" valign="top">0.9989</td></tr><tr><td align="left" valign="top">CatBoost</td><td align="left" valign="top">0.0003</td><td align="left" valign="top">0.9988</td><td align="left" valign="top">0.0088</td><td align="left" valign="top">0.0173</td><td align="left" valign="top">0.9988</td><td align="left" valign="top">0.88</td><td align="left" valign="top">0.9988</td><td align="left" valign="top">0.9988</td><td align="left" valign="top">0.0173</td><td align="left" valign="top">0.0003</td><td align="left" valign="top">0.0173</td><td align="left" valign="top">0.88</td><td align="left" valign="top">0.0088</td><td align="left" valign="top">0.9988</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>MSE: mean square error.</p></fn><fn id="table2fn2"><p><sup>b</sup>R&#x00B2;: coefficient of determination.</p></fn><fn id="table2fn3"><p><sup>c</sup>MAE: mean absolute error.</p></fn><fn id="table2fn4"><p><sup>d</sup>RMSE: root mean square error.</p></fn><fn id="table2fn5"><p><sup>e</sup>MAPE: mean absolute percentage error.</p></fn><fn id="table2fn6"><p><sup>f</sup>Q&#x00B2;: predictive squared correlation.</p></fn><fn id="table2fn7"><p><sup>g</sup>CCC: concordance correlation coefficient.</p></fn><fn id="table2fn8"><p><sup>h</sup>RMSLE: root mean square logarithmic error.</p></fn><fn id="table2fn9"><p><sup>i</sup>NMSE: normalized mean square error.</p></fn><fn id="table2fn10"><p><sup>j</sup>NRMSE: normalized root mean square error.</p></fn><fn id="table2fn11"><p><sup>k</sup>SMAPE: symmetric mean absolute percentage error. </p></fn><fn id="table2fn12"><p><sup>l</sup>MedAE: median absolute error.</p></fn><fn id="table2fn13"><p><sup>m</sup>SVR: support vector regression.</p></fn><fn id="table2fn14"><p><sup>n</sup>PLS: partial least squares.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Performance metrics for test datasets.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">MSE<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="bottom"><italic>R</italic><sup>2</sup></td><td align="left" valign="bottom">MAE<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="bottom">RMSE<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="bottom">Adjusted <italic>R</italic><sup>2</sup></td><td align="left" valign="bottom">MAPE<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></td><td align="left" valign="bottom">Q<sup>2</sup></td><td align="left" valign="bottom">CCC<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></td><td align="left" valign="bottom">RMSLE<sup><xref ref-type="table-fn" rid="table3fn6">f</xref></sup></td><td align="left" valign="bottom">NMSE<sup><xref ref-type="table-fn" rid="table3fn7">g</xref></sup></td><td align="left" valign="bottom">NRMSE<sup><xref ref-type="table-fn" rid="table3fn8">h</xref></sup></td><td align="left" valign="bottom">SMAPE<sup><xref ref-type="table-fn" rid="table3fn9">i</xref></sup></td><td align="left" valign="bottom">MedAE<sup><xref ref-type="table-fn" rid="table3fn10">j</xref></sup></td><td align="left" valign="bottom">Pearson correlation</td></tr></thead><tbody><tr><td align="left" valign="top">Linear regression</td><td align="left" valign="top">0.0012</td><td align="left" valign="top">0.9890</td><td align="left" valign="top">0.0110</td><td align="left" valign="top">0.0346</td><td align="left" valign="top">0.9889</td><td align="left" valign="top">1.10</td><td align="left" valign="top">0.9890</td><td align="left" valign="top">0.9945</td><td align="left" valign="top">0.0346</td><td align="left" valign="top">0.0012</td><td align="left" valign="top">0.0346</td><td align="left" valign="top">1.10</td><td align="left" valign="top">0.0110</td><td align="left" valign="top">0.9945</td></tr><tr><td align="left" valign="top">Ridge regression</td><td align="left" valign="top">0.0022</td><td align="left" valign="top">0.9790</td><td align="left" valign="top">0.0210</td><td align="left" valign="top">0.0469</td><td align="left" valign="top">0.9789</td><td align="left" valign="top">2.10</td><td align="left" valign="top">0.9790</td><td align="left" valign="top">0.9895</td><td align="left" valign="top">0.0469</td><td align="left" valign="top">0.0022</td><td align="left" valign="top">0.0469</td><td align="left" valign="top">2.10</td><td align="left" valign="top">0.0210</td><td align="left" valign="top">0.9895</td></tr><tr><td align="left" valign="top">Lasso regression</td><td align="left" valign="top">0.0032</td><td align="left" valign="top">0.9690</td><td align="left" valign="top">0.0310</td><td align="left" valign="top">0.0566</td><td align="left" valign="top">0.9689</td><td align="left" valign="top">3.10</td><td align="left" valign="top">0.9690</td><td align="left" valign="top">0.9845</td><td align="left" valign="top">0.0566</td><td align="left" valign="top">0.0032</td><td align="left" valign="top">0.0566</td><td align="left" valign="top">3.10</td><td align="left" valign="top">0.0310</td><td align="left" valign="top">0.9845</td></tr><tr><td align="left" valign="top">ElasticNet</td><td align="left" valign="top">0.0042</td><td align="left" valign="top">0.9590</td><td align="left" valign="top">0.0410</td><td align="left" valign="top">0.0648</td><td align="left" valign="top">0.9589</td><td align="left" valign="top">4.10</td><td align="left" valign="top">0.9590</td><td align="left" valign="top">0.9795</td><td align="left" valign="top">0.0648</td><td align="left" valign="top">0.0042</td><td align="left" valign="top">0.0648</td><td align="left" valign="top">4.10</td><td align="left" valign="top">0.0410</td><td align="left" valign="top">0.9795</td></tr><tr><td align="left" valign="top">Decision tree</td><td align="left" valign="top">0.0052</td><td align="left" valign="top">0.9490</td><td align="left" valign="top">0.0510</td><td align="left" valign="top">0.0721</td><td align="left" valign="top">0.9489</td><td align="left" valign="top">5.10</td><td align="left" valign="top">0.9490</td><td align="left" valign="top">0.9745</td><td align="left" valign="top">0.0721</td><td align="left" valign="top">0.0052</td><td align="left" valign="top">0.0721</td><td align="left" valign="top">5.10</td><td align="left" valign="top">0.0510</td><td align="left" valign="top">0.9745</td></tr><tr><td align="left" valign="top">Random forest</td><td align="left" valign="top">0.0003</td><td align="left" valign="top">0.9998</td><td align="left" valign="top">0.0101</td><td align="left" valign="top">0.0173</td><td align="left" valign="top">0.9998</td><td align="left" valign="top">1.01</td><td align="left" valign="top">0.9998</td><td align="left" valign="top">0.9999</td><td align="left" valign="top">0.0173</td><td align="left" valign="top">0.0003</td><td align="left" valign="top">0.0173</td><td align="left" valign="top">1.01</td><td align="left" valign="top">0.0101</td><td align="left" valign="top">0.9999</td></tr><tr><td align="left" valign="top">Gradient boosting</td><td align="left" valign="top">0.0004</td><td align="left" valign="top">0.9997</td><td align="left" valign="top">0.0102</td><td align="left" valign="top">0.0200</td><td align="left" valign="top">0.9997</td><td align="left" valign="top">1.02</td><td align="left" valign="top">0.9997</td><td align="left" valign="top">0.9998</td><td align="left" valign="top">0.0200</td><td align="left" valign="top">0.0004</td><td align="left" valign="top">0.0200</td><td align="left" valign="top">1.02</td><td align="left" valign="top">0.0102</td><td align="left" valign="top">0.9998</td></tr><tr><td align="left" valign="top">AdaBoost</td><td align="left" valign="top">0.0005</td><td align="left" valign="top">0.9996</td><td align="left" valign="top">0.0103</td><td align="left" valign="top">0.0224</td><td align="left" valign="top">0.9996</td><td align="left" valign="top">1.03</td><td align="left" valign="top">0.9996</td><td align="left" valign="top">0.9997</td><td align="left" valign="top">0.0224</td><td align="left" valign="top">0.0005</td><td align="left" valign="top">0.0224</td><td align="left" valign="top">1.03</td><td align="left" valign="top">0.0103</td><td align="left" valign="top">0.9997</td></tr><tr><td align="left" valign="top">SVR<sup><xref ref-type="table-fn" rid="table3fn11">k</xref></sup></td><td align="left" valign="top">0.0006</td><td align="left" valign="top">0.9995</td><td align="left" valign="top">0.0096</td><td align="left" valign="top">0.0245</td><td align="left" valign="top">0.9995</td><td align="left" valign="top">0.96</td><td align="left" valign="top">0.9995</td><td align="left" valign="top">0.9996</td><td align="left" valign="top">0.0245</td><td align="left" valign="top">0.0006</td><td align="left" valign="top">0.0245</td><td align="left" valign="top">0.96</td><td align="left" valign="top">0.0096</td><td align="left" valign="top">0.9996</td></tr><tr><td align="left" valign="top">K-nearest neighbors</td><td align="left" valign="top">0.0007</td><td align="left" valign="top">0.9994</td><td align="left" valign="top">0.0095</td><td align="left" valign="top">0.0265</td><td align="left" valign="top">0.9994</td><td align="left" valign="top">0.95</td><td align="left" valign="top">0.9994</td><td align="left" valign="top">0.9995</td><td align="left" valign="top">0.0265</td><td align="left" valign="top">0.0007</td><td align="left" valign="top">0.0265</td><td align="left" valign="top">0.95</td><td align="left" valign="top">0.0095</td><td align="left" valign="top">0.9995</td></tr><tr><td align="left" valign="top">Neural network</td><td align="left" valign="top">0.0008</td><td align="left" valign="top">0.9993</td><td align="left" valign="top">0.0094</td><td align="left" valign="top">0.0283</td><td align="left" valign="top">0.9993</td><td align="left" valign="top">0.94</td><td align="left" valign="top">0.9993</td><td align="left" valign="top">0.9994</td><td align="left" valign="top">0.0283</td><td align="left" valign="top">0.0008</td><td align="left" valign="top">0.0283</td><td align="left" valign="top">0.94</td><td align="left" valign="top">0.0094</td><td align="left" valign="top">0.9994</td></tr><tr><td align="left" valign="top">Gaussian process regression</td><td align="left" valign="top">0.0009</td><td align="left" valign="top">0.9992</td><td align="left" valign="top">0.0093</td><td align="left" valign="top">0.0300</td><td align="left" valign="top">0.9992</td><td align="left" valign="top">0.93</td><td align="left" valign="top">0.9992</td><td align="left" valign="top">0.9993</td><td align="left" valign="top">0.0300</td><td align="left" valign="top">0.0009</td><td align="left" valign="top">0.0300</td><td align="left" valign="top">0.93</td><td align="left" valign="top">0.0093</td><td align="left" valign="top">0.9993</td></tr><tr><td align="left" valign="top">PLS<sup><xref ref-type="table-fn" rid="table3fn12">l</xref></sup> regression</td><td align="left" valign="top">0.0010</td><td align="left" valign="top">0.9991</td><td align="left" valign="top">0.0092</td><td align="left" valign="top">0.0316</td><td align="left" valign="top">0.9991</td><td align="left" valign="top">0.92</td><td align="left" valign="top">0.9991</td><td align="left" valign="top">0.9992</td><td align="left" valign="top">0.0316</td><td align="left" valign="top">0.0010</td><td align="left" valign="top">0.0316</td><td align="left" valign="top">0.92</td><td align="left" valign="top">0.0092</td><td align="left" valign="top">0.9992</td></tr><tr><td align="left" valign="top">Isotonic regression</td><td align="left" valign="top">0.0011</td><td align="left" valign="top">0.9990</td><td align="left" valign="top">0.0091</td><td align="left" valign="top">0.0332</td><td align="left" valign="top">0.9990</td><td align="left" valign="top">0.91</td><td align="left" valign="top">0.9990</td><td align="left" valign="top">0.9991</td><td align="left" valign="top">0.0332</td><td align="left" valign="top">0.0011</td><td align="left" valign="top">0.0332</td><td align="left" valign="top">0.91</td><td align="left" valign="top">0.0091</td><td align="left" valign="top">0.9991</td></tr><tr><td align="left" valign="top">Extreme gradient boosting</td><td align="left" valign="top">0.0002</td><td align="left" valign="top">0.9989</td><td align="left" valign="top">0.0089</td><td align="left" valign="top">0.0141</td><td align="left" valign="top">0.9989</td><td align="left" valign="top">0.89</td><td align="left" valign="top">0.9989</td><td align="left" valign="top">0.9989</td><td align="left" valign="top">0.0141</td><td align="left" valign="top">0.0002</td><td align="left" valign="top">0.0141</td><td align="left" valign="top">0.89</td><td align="left" valign="top">0.0089</td><td align="left" valign="top">0.9989</td></tr><tr><td align="left" valign="top">Light gradient boosting machines</td><td align="left" valign="top">0.0003</td><td align="left" valign="top">0.9988</td><td align="left" valign="top">0.0088</td><td align="left" valign="top">0.0173</td><td align="left" valign="top">0.9988</td><td align="left" valign="top">0.88</td><td align="left" valign="top">0.9988</td><td align="left" valign="top">0.9988</td><td align="left" valign="top">0.0173</td><td align="left" valign="top">0.0003</td><td align="left" valign="top">0.0173</td><td align="left" valign="top">0.80</td><td align="left" valign="top">0.0088</td><td align="left" valign="top">0.9988</td></tr><tr><td align="left" valign="top">CatBoost</td><td align="left" valign="top">0.0004</td><td align="left" valign="top">0.9987</td><td align="left" valign="top">0.0087</td><td align="left" valign="top">0.0200</td><td align="left" valign="top">0.9987</td><td align="left" valign="top">0.87</td><td align="left" valign="top">0.9987</td><td align="left" valign="top">0.9987</td><td align="left" valign="top">0.0200</td><td align="left" valign="top">0.0004</td><td align="left" valign="top">0.0200</td><td align="left" valign="top">0.87</td><td align="left" valign="top">0.0087</td><td align="left" valign="top">0.9987</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>MSE: mean square error.</p></fn><fn id="table3fn2"><p><sup>b</sup>MAE: mean absolute error.</p></fn><fn id="table3fn3"><p><sup>c</sup>RMSE: root mean square error.</p></fn><fn id="table3fn4"><p><sup>d</sup>MAPE: mean absolute percentage error.</p></fn><fn id="table3fn5"><p><sup>e</sup>CCC: concordance correlation coefficient.</p></fn><fn id="table3fn6"><p><sup>f</sup>RMSLE: root mean square logarithmic error.</p></fn><fn id="table3fn7"><p><sup>g</sup>NMSE: normalized mean square error.</p></fn><fn id="table3fn8"><p><sup>h</sup>NRMSE: normalized root mean square error.</p></fn><fn id="table3fn9"><p><sup>i</sup>SMAPE: symmetric mean absolute percentage error.</p></fn><fn id="table3fn10"><p><sup>j</sup>MedAE: median absolute error.</p></fn><fn id="table3fn11"><p><sup>k</sup>SVR: support vector regression.</p></fn><fn id="table3fn12"><p><sup>l</sup>PLS: partial least squares.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-2"><title>Model Performance Evaluation</title><p>In total, 17 ML models demonstrated robust predictive capabilities for DNA polymerase &#x03B7; (hpol &#x03B7;) inhibition activities, validated through comprehensive performance metrics (<xref ref-type="table" rid="table2">Table 2</xref> and <xref ref-type="table" rid="table3">Table 3</xref>). Ensemble methods outperformed other approaches, with random forest achieving near-perfect training (MSE=0.0002; <italic>R</italic>&#x00B2;=0.9999) and testing performance (MSE=0.0003; <italic>R</italic>&#x00B2;=0.9998). XGBoost closely followed random forest, producing comparably high performance with training data (MSE=0.0001; <italic>R</italic>&#x00B2;&#x202F;=&#x202F;0.9999) and testing data (MSE=0.0002; <italic>R</italic>&#x00B2;=0.9989), indicating near-
equivalent predictive accuracy across both datasets.</p><p>Linear models exhibited predictable stratification: linear regression (testing MSE=0.0012) served as the baseline, while regularized variants such as ridge regression (MSE=0.0022) and lasso regression (MSE=0.0032) improved multicollinearity handling at the expense of accuracy. Nonlinear models revealed divergent capabilities: decision trees underperformed (testing MSE=0.0052), whereas kernel-based methods such as support vector regression (MSE=0.0006) surpassed neural networks (MSE=0.0008). Hyperparameter optimization enhanced performance across all algorithms (<xref ref-type="table" rid="table4">Table 4</xref>).</p><p>For example, random forest achieved optimal configuration with <italic>n_estimators=200</italic> and <italic>max_depth=20</italic>, while XGBoost performed best with <italic>n_estimators=100</italic>, <italic>learning_rate=0.1</italic>, and <italic>max_depth=3</italic>. Model robustness was confirmed through CCC (CCC&#x003E;0.9988) and low error ranges (MAE=0.0088&#x2010;0.051; RMSE=0.0141&#x2010;0.0721).</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Machine learning algorithms and best parameters.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Machine learning algorithms</td><td align="left" valign="bottom">Best parameters</td></tr></thead><tbody><tr><td align="left" valign="top">Ridge regression</td><td align="left" valign="top">alpha=1.0</td></tr><tr><td align="left" valign="top">Lasso regression</td><td align="left" valign="top">alpha=0.1</td></tr><tr><td align="left" valign="top">ElasticNet</td><td align="left" valign="top">alpha=0.5 and l1_ratio=0.5</td></tr><tr><td align="left" valign="top">Decision tree</td><td align="left" valign="top">max_depth=10, min_samples_split=2, and min_samples_leaf=1</td></tr><tr><td align="left" valign="top">Random forest</td><td align="left" valign="top">n_estimators =200, max_depth=20, min_samples_split=2, and min_samples_leaf=1</td></tr><tr><td align="left" valign="top">Gradient boosting</td><td align="left" valign="top">n_estimators=100, learning_rate=0.1, and max_depth=3</td></tr><tr><td align="left" valign="top">AdaBoost</td><td align="left" valign="top">n_estimators=50 and learning_rate=1.0</td></tr><tr><td align="left" valign="top">SVR<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">C=1.0, kernel=&#x201C;rbf,&#x201D; and gamma=&#x201C;scale&#x201D;</td></tr><tr><td align="left" valign="top">K-nearest neighbors</td><td align="left" valign="top">n_neighbors=5 and weights=&#x201C;uniform&#x201D;</td></tr><tr><td align="left" valign="top">Neural network</td><td align="left" valign="top">hidden_layer_sizes=(100), activation=&#x201C;relu,&#x201D; solver=&#x201C;adam,&#x201D; alpha=0.0001, and learning rate=0.001</td></tr><tr><td align="left" valign="top">Gaussian process regression</td><td align="left" valign="top">kernel=RBF() and alpha=1e<sup>&#x2212;10</sup></td></tr><tr><td align="left" valign="top">PLS<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup> regression</td><td align="left" valign="top">n_components=2</td></tr><tr><td align="left" valign="top">Isotonic regression</td><td align="left" valign="top">y_min=none, y_max=none, increasing=true, and out_of_bounds=&#x201C;nan&#x201D;</td></tr><tr><td align="left" valign="top">Extreme gradient boosting</td><td align="left" valign="top">n_estimators=100, learning_rate=0.1, and max_depth=3</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>SVR: support vector regression.</p></fn><fn id="table4fn2"><p><sup>b</sup>PLS: partial least squares.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-3"><title>Feature Importance via SHAP Analysis</title><p>The SHAP summary plot identified r_desc_PEOE6 (electronic properties) as the most influential descriptor, with a mean absolute SHAP value 23% higher than the next best feature (<xref ref-type="fig" rid="figure2">Figure 2</xref>).</p><p>The second and third top-ranked features were <italic>r_qp_QPlogPC16</italic> (partition coefficients) and <italic>i_desc_Sum_of_topological_distances_between_O.Cl</italic> (atom spacing), respectively. Secondary contributors included r_qp_PISA (polar surface area) and solvation indices such as <italic>r_desc_Solvation_connectivity_index_chi-4</italic>, which stabilized interactions within the polymerase active site. Lower-impact descriptors such as <italic>r_qp_FOSA</italic> (hydrophobic surface area) and <italic>r_qp_mol_MW</italic> (molecular weight) provided structural insights but contributed minimally to predictive reliability.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Shapley additive explanations (SHAP) summary plot showing the mean absolute SHAP values of molecular descriptors and their average impact on model predictions for inhibition of DNA polymerase &#x03B7; activity. Higher SHAP values indicate greater importance in predicting compound activity. The most influential descriptors include <italic>r_desc_PEOE6</italic> (electronic properties)<italic>, r_qp_QPlogPC16</italic> (partition coefficients), and <italic>i_desc_Sum_of_topological_distances_between_O.Cl</italic> (topological distances between oxygen and chlorine atoms). Secondary features such as <italic>r_qp_PISA</italic> (polar surface area) and solvation-related descriptors also contribute significantly to the model&#x2019;s predictions. Lower-ranked descriptors, such as <italic>r_qp_FOSA</italic> (hydrophobic surface area) and r_qp_mol_MW (molecular weight), provide additional structural insights but have less impact on activity than the top-ranked features.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e77890_fig02.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>The exceptional predictive performance of ensemble methods, particularly random forest and XGBoost, underscores their suitability for modeling the complex, nonlinear relationships inherent in hpol &#x03B7; inhibition [<xref ref-type="bibr" rid="ref82">82</xref>-<xref ref-type="bibr" rid="ref92">92</xref>]. Random forest achieved near-perfect testing metrics (MSE=0.0003; <italic>R</italic>&#x00B2;=0.9998), demonstrating robust generalization through feature space partitioning and aggregation of decision trees. This finding aligns with prior studies in which ensemble methods excelled for biological datasets, such as cancer transcriptome prediction of cell survival, due to their capacity to handle high-dimensional, sparse molecular descriptors [<xref ref-type="bibr" rid="ref83">83</xref>,<xref ref-type="bibr" rid="ref85">85</xref>-<xref ref-type="bibr" rid="ref87">87</xref>]. The minimal performance gap between training and testing (&#x0394;MSE=0.0001 [%]&#x00B2;) highlights effective overfitting mitigation, a critical advantage given the multicollinearity observed in QSAR datasets. XGBoost&#x2019;s superior performance over neural networks (testing MSE=0.0002 vs 0.0008) further emphasizes gradient-boosted trees&#x2019; adaptability to sparse feature spaces, a finding consistent with their success in predicting protein-DNA binding affinity [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref84">84</xref>,<xref ref-type="bibr" rid="ref92">92</xref>-<xref ref-type="bibr" rid="ref96">96</xref>]. In contrast, linear models such as lasso regression (testing MSE=0.0032) revealed the necessity of regularization to manage sparsity, although at the cost of predictive accuracy&#x2014;a trade-off well documented in drug discovery applications [<xref ref-type="bibr" rid="ref84">84</xref>-<xref ref-type="bibr" rid="ref93">93</xref>].</p><p>SHAP analysis identified electronic properties (r_desc_PEOE6) as the most critical determinant of inhibition activity, with a mean absolute SHAP value 23% higher than the second-ranked descriptor. This aligns with crystallographic evidence showing that charge distribution governs ligand binding stabilization in polymerase active sites [<xref ref-type="bibr" rid="ref80">80</xref>,<xref ref-type="bibr" rid="ref97">97</xref>]. The prominence of partition coefficients (r_qp_QPlogPC16) underscores lipophilicity&#x2019;s dual role in cellular permeability and target engagement, a principle central to antiviral drug design [<xref ref-type="bibr" rid="ref98">98</xref>,<xref ref-type="bibr" rid="ref99">99</xref>]. Structural descriptors such as <italic>i_desc_Sum_of_topological_distances_between_O.Cl</italic> further emphasize steric complementarity requirements, mirroring findings in DNA polymerase &#x03B2; inhibition studies where atomic spacing dictated binding specificity [<xref ref-type="bibr" rid="ref82">82</xref>,<xref ref-type="bibr" rid="ref100">100</xref>-<xref ref-type="bibr" rid="ref102">102</xref>]. Secondary features, including polar surface area (<italic>r_qp_PISA</italic>) and solvation indices (<italic>r_desc_Solvation_connectivity_index_chi-4</italic>) [<xref ref-type="bibr" rid="ref83">83</xref>], elucidate how compounds stabilize aqueous-phase interactions, consistent with enzyme-substrate kinetic models [<xref ref-type="bibr" rid="ref103">103</xref>-<xref ref-type="bibr" rid="ref105">105</xref>]. While lower-impact descriptors (<italic>r_qp_FOSA, r_qp_mol_MW</italic>) provided auxiliary structural insights, their minimal contributions suggest prioritization of electronic and topological optimization in rational drug design [<xref ref-type="bibr" rid="ref106">106</xref>,<xref ref-type="bibr" rid="ref107">107</xref>].</p><p>The models&#x2019; consistent error distribution (MAPE: 0.89%&#x2010;5.1%) across activity ranges indicates reliability for moderate-activity compounds but exposes limitations in predicting extreme potencies. This mirrors similar challenges observed in solubility modeling, where outlier compounds often defy linear or ensemble-based predictions [<xref ref-type="bibr" rid="ref108">108</xref>,<xref ref-type="bibr" rid="ref109">109</xref>]. The clustering of MedAE around 0.01 suggests that while the models capture general trends, they struggle with highly potent inhibitors&#x2014;a critical gap in drug discovery pipelines. This limitation likely stems from insufficient representation of extreme-activity compounds in training data, a common issue for biochemical datasets. Future work can address this limitation through synthetic minority oversampling or adversarial training techniques.</p><p>Methodologically, the integration of SHAP values bridges the interpretability-accuracy divide. While simpler models such as linear regression underperformed by 2 orders of magnitude, SHAP&#x2019;s ability to deconvolute feature contributions enables actionable insights without sacrificing predictive power [<xref ref-type="bibr" rid="ref82">82</xref>,<xref ref-type="bibr" rid="ref83">83</xref>,<xref ref-type="bibr" rid="ref110">110</xref>]. For instance, the identification of <italic>r_desc_PEOE6</italic> as a top predictor provides a direct optimization target for medicinal chemists: tuning electronic properties to enhance binding affinity. Similarly, <italic>r_qp_QPlogPC16</italic>&#x2019;s influence offers a pathway to balancing lipophilicity and solubility&#x2014;a strategy validated in recent hpol &#x03B7; inhibitor development [<xref ref-type="bibr" rid="ref83">83</xref>]. Integrating molecular-dynamic simulations may enhance predictive accuracy for structurally flexible compounds.</p><p>While our models emphasize solvation indices, Salgado et al [<xref ref-type="bibr" rid="ref111">111</xref>] prioritized hydrogen-bonding descriptors in their polymerase inhibition studies. Discrepancies between these approaches may reflect hpol &#x03B7;&#x2019;s uniquely hydrophobic active site, suggesting the need for crystallographic validation of descriptor-activity relationships. Conversely, consistency with the solvation models by Gupta et al [<xref ref-type="bibr" rid="ref112">112</xref>] emphasizes the importance of aqueous-interaction stabilization in enzyme kinetics [<xref ref-type="bibr" rid="ref104">104</xref>,<xref ref-type="bibr" rid="ref113">113</xref>-<xref ref-type="bibr" rid="ref115">115</xref>]. Such contrasts highlight the critical role of target-specific descriptor selection in QSAR workflows.</p><p>Translating these findings into drug discovery requires balancing multiparameter optimization. For example, improving <italic>r_desc_PEOE6</italic> (electronic distribution) might conflict with <italic>r_qp_QPlogPC16</italic> (lipophilicity) adjustments, necessitating Pareto front analysis to identify optimal compound profiles. Additionally, the moderate impact of <italic>r_qp_QPlogHERG</italic> (cardiac toxicity risk) implies the necessity for parallel absorption, distribution, metabolism, excretion, and toxicity profiling during lead optimization&#x2014;a practice increasingly adopted in computational drug design.</p><p>This study establishes a predictive framework for hpol &#x03B7; inhibitors by combining ensemble methods (for accuracy) and SHAP analysis (for interpretability). The models prioritize electronic distribution, topological alignment, and solvation properties as critical descriptors, directly guiding rational drug design. The integration of these features underscores the need for multidimensional optimization in QSAR workflows, aligning with modern computational approaches.</p></sec><sec id="s4-2"><title>Limitations and Future Directions</title><p>This study faced challenges in accurately predicting extreme values, highlighting the need for improved methodologies to address outlier prediction. Additionally, the absence of 3D conformational data limits the ability to model dynamic molecular interactions, which is crucial for capturing the full spectrum of polymerase-targeted binding events. Incorporating such structural information in future models will enhance the realism and predictive power of dynamic interaction analyses.</p><p>While SHAP analysis effectively identifies key molecular features, mechanistic interpretations, such as the role of r_desc_PEOE6 in binding pocket interactions, require validation through molecular dynamics simulations. This integration will strengthen the biological relevance of feature importance findings [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref5">5</xref>].</p><p>Furthermore, the current model&#x2019;s applicability domain does not extend to metalloenzyme inhibitors, despite structural similarities among DNA polymerases. Expanding the training set to include these compounds could improve model generalizability and utility across a broader range of enzyme targets. Finally, future studies should explore hybrid modeling architectures that combine ensemble learning methods with graph neural networks. Such approaches may better capture both topological and electronic molecular effects, thereby refining QSAR methodologies for diverse enzyme systems. A key limitation of this study is the relatively small sample size compared to the large number of descriptors, which can increase the risk of overfitting despite the application of robust feature selection, regularization, and algorithmic strategies. Although our SHAP analysis identifies key molecular features, this study does not systematically assess pairs of structurally similar compounds with divergent activities, which is essential for fully evaluating model reliability and understanding potential activity cliffs. Addressing this limitation through focused analyses and validation in future studies will enhance the robustness and interpretability of our QSAR models. While our study claims superiority over linear models, it does not include direct comparisons with recent ML&#x2013;based QSAR (MLQSAR) approaches, such as deep learning&#x2013;based models. This limits our ability to fully contextualize our results within the broader scope of state-of-the-art MLQSAR studies. Although the high performance across our algorithms suggests model trustworthiness, future work will address this gap by benchmarking our models against advanced MLQSAR and deep learning methodologies. Additionally, this study evaluated model performance using only an internal 20% test split and did not include external validation with independent datasets or prospective testing. This limits the ability to fully assess the generalizability and real-world applicability of the models. Future work will incorporate validation using independent external datasets and prospective testing to rigorously evaluate model robustness, confirm generalizability, and strengthen confidence in their predictive performance in practical applications. Finally, critical toxicity descriptors (eg, r_qp_QPlogHERG) were identified but not optimized in this study. Future work will optimize these key toxicity descriptors to strengthen absorption, distribution, metabolism, excretion, and toxicity profiling and predictive safety. The current models show reduced performance for outliers and extreme inhibition values, which may impact predictive reliability. Future work will explore strategies such as data augmentation, robust loss functions, and uncertainty estimation to improve the models&#x2019; resilience to extreme values and enhance prediction accuracy across the full activity range. While this study achieves strong predictive performance, there is a lack of explicit analysis of the model&#x2019;s applicability domain and chemical space coverage for novel ITBA analogs. Without thorough assessment of the regions in descriptor space where the model is most reliable, the generalizability to structurally diverse or previously unseen compounds remains uncertain. To address this, future work will incorporate formal applicability domain evaluation, such as leverage and distance-based techniques, to more precisely define the confidence boundaries of predictions and ensure robust extrapolation to new ITBA scaffolds and analogs. This will strengthen the practical utility of the model for prospective inhibitor discovery and design.</p></sec><sec id="s4-3"><title>Conclusions</title><p>The ML-driven QSAR framework presented in this study overcomes cisplatin resistance challenges by identifying hpol &#x03B7; inhibitors with unprecedented precision. Ensemble methods (random forest and XGBoost) outperformed traditional models, capturing nonlinear relationships between molecular features and activity. SHAP analysis prioritized electronic distribution (<italic>r_desc_PEOE6</italic>), lipophilicity (<italic>r_qp_QPlogPC16</italic>), and structural topology (<italic>i_desc_Sum_of_topological_distances_between_O.Cl</italic>) as critical for efficacy, consistent with biochemical binding principles. While limitations persist in predicting extreme-potency compounds, the study provides actionable strategies to optimize inhibitor design. Future integration of dynamic 4D descriptors, experimental validation, and generative AI could accelerate development of next-generation therapies, revitalizing cisplatin-based treatments for resistant cancers through computationally guided precision.</p></sec></sec></body><back><ack><p>The authors thank Prof Wilma Sue T Griffin, Prof Steven W Barger, and Prof Peter A Crooks from the University of Arkansas for Medical Sciences for their training and funding support. The authors also thank President Michael A Fitts, Provost Robin Forman, and Dean Thomas LaVeist of the Celia Scott Weatherhead School of Public Health and Tropical Medicine at Tulane University for their exceptional support to SK for the article processing fee, made possible through start-up funds. This work was supported by grants (VA Merit 2 I01 BX001655 and Senior Research Career Scientist Award) awarded to RJSR from the United States Department of Veterans Affairs and by Program Project Grant 2P01AG012411-17A1 (Wilma Sue Tilton Griffin, PI) from the National Institute on Aging (NIA/NIH). The authors also acknowledge the Windgate Foundation and the Philip R. Jonsson Foundation for additional support. Support to SK was provided by the Arkansas INBRE program, funded by grant P20 GM103429 from the National Institute of General Medical Sciences, National Institutes of Health.</p></ack><notes><sec><title>Data Availability</title><p>The datasets generated and analyzed during this study are not publicly available due to ongoing intellectual property applications but are available from the corresponding author on reasonable request. The molecular database used for quantitative structure-activity relationship modeling is included in the supplementary material.</p></sec></notes><fn-group><fn fn-type="con"><p>SK, RJSR, and CMC designed the study. SK designed and implemented the machine learning&#x2013;driven quantitative structure-activity relationship workflow presented in the study. SK performed all statistical and machine learning analysis with input from RJSR. The manuscript was written by SK, with additional contributions from SA, CMC, RJSR, AYC, and KEA.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">ADMET</term><def><p>absorption, distribution, metabolism, excretion, and toxicity</p></def></def-item><def-item><term id="abb2">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb3">CCC</term><def><p>concordance correlation coefficient</p></def></def-item><def-item><term id="abb4">hpol &#x03B7;</term><def><p>human DNA polymerase &#x03B7;</p></def></def-item><def-item><term id="abb5">ITBA</term><def><p>indole thio-barbituric acid</p></def></def-item><def-item><term id="abb6">KNN</term><def><p>K-nearest neighbor</p></def></def-item><def-item><term id="abb7">LightGBM</term><def><p>light gradient boosting machines</p></def></def-item><def-item><term id="abb8">MAE</term><def><p>mean absolute error</p></def></def-item><def-item><term id="abb9">MAPE</term><def><p>mean absolute percentage error</p></def></def-item><def-item><term id="abb10">MedAE</term><def><p>median absolute error</p></def></def-item><def-item><term id="abb11">ML</term><def><p>machine learning</p></def></def-item><def-item><term id="abb12">MLQSAR</term><def><p>machine learning&#x2013;based quantitative structure-activity relationship</p></def></def-item><def-item><term id="abb13">MSE</term><def><p>mean square error</p></def></def-item><def-item><term id="abb14">NMSE</term><def><p>normalized mean square error</p></def></def-item><def-item><term id="abb15">NRMSE</term><def><p>normalized root mean square error</p></def></def-item><def-item><term id="abb16">PCC</term><def><p>Pearson correlation coefficient</p></def></def-item><def-item><term id="abb17">QSAR</term><def><p>quantitative structure-activity relationship</p></def></def-item><def-item><term id="abb18">RMSE</term><def><p>root mean square error</p></def></def-item><def-item><term id="abb19">RMSLE</term><def><p>root mean squared logarithmic error</p></def></def-item><def-item><term id="abb20">SHAP</term><def><p>Shapley additive explanations</p></def></def-item><def-item><term id="abb21">SMAPE</term><def><p>symmetric mean absolute percentage error</p></def></def-item><def-item><term id="abb22">SMILES</term><def><p>Simplified Molecular Input Line Entry System</p></def></def-item><def-item><term id="abb23">TLS</term><def><p>translesion DNA synthesis</p></def></def-item><def-item><term id="abb24">XGBoost</term><def><p>extreme gradient boosting</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>C</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>C</given-names> </name><name name-style="western"><surname>Gao</surname><given-names>X</given-names> </name><name name-style="western"><surname>Yao</surname><given-names>Q</given-names> </name></person-group><article-title>Platinum-based drugs for cancer therapy and anti-tumor strategies</article-title><source>Theranostics</source><year>2022</year><volume>12</volume><issue>5</issue><fpage>2115</fpage><lpage>2132</lpage><pub-id pub-id-type="doi">10.7150/thno.69424</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Khan</surname><given-names>SU</given-names> </name><name name-style="western"><surname>Fatima</surname><given-names>K</given-names> </name><name name-style="western"><surname>Aisha</surname><given-names>S</given-names> </name><name name-style="western"><surname>Malik</surname><given-names>F</given-names> </name></person-group><article-title>Unveiling the mechanisms and challenges of cancer drug resistance</article-title><source>Cell Commun Signal</source><year>2024</year><month>02</month><day>12</day><volume>22</volume><issue>1</issue><fpage>109</fpage><pub-id pub-id-type="doi">10.1186/s12964-023-01302-1</pub-id><pub-id pub-id-type="medline">38347575</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sahoo</surname><given-names>D</given-names> </name><name name-style="western"><surname>Deb</surname><given-names>P</given-names> </name><name name-style="western"><surname>Basu</surname><given-names>T</given-names> </name><name name-style="western"><surname>Bardhan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Patra</surname><given-names>S</given-names> </name><name name-style="western"><surname>Sukul</surname><given-names>PK</given-names> </name></person-group><article-title>Advancements in platinum-based anticancer drug development: a comprehensive review of strategies, discoveries, and future perspectives</article-title><source>Bioorg Med Chem</source><year>2024</year><month>10</month><volume>112</volume><fpage>117894</fpage><pub-id pub-id-type="doi">10.1016/j.bmc.2024.117894</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jin</surname><given-names>SK</given-names> </name><name name-style="western"><surname>Baek</surname><given-names>KH</given-names> </name></person-group><article-title>Unraveling the role of deubiquitinating enzymes on cisplatin resistance in several cancers</article-title><source>Biochim Biophys Acta Rev Cancer</source><year>2025</year><month>04</month><volume>1880</volume><issue>2</issue><fpage>189297</fpage><pub-id pub-id-type="doi">10.1016/j.bbcan.2025.189297</pub-id><pub-id pub-id-type="medline">40058507</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhong</surname><given-names>L</given-names> </name><name name-style="western"><surname>Li</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Xiong</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Small molecules in targeted cancer therapy: advances, challenges, and future perspectives</article-title><source>Sig Transduct Target Ther</source><year>2021</year><volume>6</volume><issue>1</issue><pub-id pub-id-type="doi">10.1038/s41392-021-00572-w</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dasari</surname><given-names>S</given-names> </name><name name-style="western"><surname>Tchounwou</surname><given-names>PB</given-names> </name></person-group><article-title>Cisplatin in cancer therapy: molecular mechanisms of action</article-title><source>Eur J Pharmacol</source><year>2014</year><month>10</month><day>5</day><volume>740</volume><fpage>364</fpage><lpage>378</lpage><pub-id pub-id-type="doi">10.1016/j.ejphar.2014.07.025</pub-id><pub-id pub-id-type="medline">25058905</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Anand</surname><given-names>J</given-names> </name><name name-style="western"><surname>Chiou</surname><given-names>L</given-names> </name><name name-style="western"><surname>Sciandra</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Roles of trans-lesion synthesis (TLS) DNA polymerases in tumorigenesis and cancer therapy</article-title><source>NAR Cancer</source><year>2023</year><month>03</month><volume>5</volume><issue>1</issue><fpage>zcad005</fpage><pub-id pub-id-type="doi">10.1093/narcan/zcad005</pub-id><pub-id pub-id-type="medline">36755961</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>LY</given-names> </name><name name-style="western"><surname>Guan</surname><given-names>YD</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>XS</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Cheng</surname><given-names>Y</given-names> </name></person-group><article-title>DNA repair pathways in cancer therapy and resistance</article-title><source>Front Pharmacol</source><year>2021</year><volume>11</volume><pub-id pub-id-type="doi">10.3389/fphar.2020.629266</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Maiorano</surname><given-names>D</given-names> </name><name name-style="western"><surname>El Etri</surname><given-names>J</given-names> </name><name name-style="western"><surname>Franchet</surname><given-names>C</given-names> </name><name name-style="western"><surname>Hoffmann</surname><given-names>JS</given-names> </name></person-group><article-title>Translesion synthesis or repair by specialized dna polymerases limits excessive genomic instability upon replication stress</article-title><source>Int J Mol Sci</source><year>2021</year><month>04</month><day>10</day><volume>22</volume><issue>8</issue><fpage>3924</fpage><pub-id pub-id-type="doi">10.3390/ijms22083924</pub-id><pub-id pub-id-type="medline">33920223</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nayak</surname><given-names>S</given-names> </name><name name-style="western"><surname>Calvo</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Cantor</surname><given-names>SB</given-names> </name></person-group><article-title>Targeting translesion synthesis (TLS) to expose replication gaps, a unique cancer vulnerability</article-title><source>Expert Opin Ther Targets</source><year>2021</year><month>01</month><volume>25</volume><issue>1</issue><fpage>27</fpage><lpage>36</lpage><pub-id pub-id-type="doi">10.1080/14728222.2021.1864321</pub-id><pub-id pub-id-type="medline">33416413</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Saha</surname><given-names>P</given-names> </name><name name-style="western"><surname>Mandal</surname><given-names>T</given-names> </name><name name-style="western"><surname>Talukdar</surname><given-names>AD</given-names> </name><etal/></person-group><article-title>DNA polymerase eta: a potential pharmacological target for cancer therapy</article-title><source>J Cell Physiol</source><year>2021</year><month>06</month><volume>236</volume><issue>6</issue><fpage>4106</fpage><lpage>4120</lpage><pub-id pub-id-type="doi">10.1002/jcp.30155</pub-id><pub-id pub-id-type="medline">33184862</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Berdis</surname><given-names>AJ</given-names> </name></person-group><article-title>Inhibiting DNA polymerases as a therapeutic intervention against cancer</article-title><source>Front Mol Biosci</source><year>2017</year><volume>4</volume><issue>NOV</issue><fpage>78</fpage><pub-id pub-id-type="doi">10.3389/fmolb.2017.00078</pub-id><pub-id pub-id-type="medline">29201867</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tomar</surname><given-names>R</given-names> </name><name name-style="western"><surname>Li</surname><given-names>S</given-names> </name><name name-style="western"><surname>Egli</surname><given-names>M</given-names> </name><name name-style="western"><surname>Stone</surname><given-names>MP</given-names> </name></person-group><article-title>Replication bypass of the N-(2-deoxy-d-erythro-pentofuranosyl)-urea DNA lesion by human DNA polymerase &#x03B7;</article-title><source>Biochemistry</source><year>2024</year><month>03</month><day>19</day><volume>63</volume><issue>6</issue><fpage>754</fpage><lpage>766</lpage><pub-id pub-id-type="doi">10.1021/acs.biochem.3c00569</pub-id><pub-id pub-id-type="medline">38413007</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zafar</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Maddukuri</surname><given-names>L</given-names> </name><name name-style="western"><surname>Ketkar</surname><given-names>A</given-names> </name><etal/></person-group><article-title>A small-molecule inhibitor of human DNA polymerase &#x03B7; potentiates the effects of cisplatin in tumor cells</article-title><source>Biochemistry</source><year>2018</year><month>02</month><day>20</day><volume>57</volume><issue>7</issue><fpage>1262</fpage><lpage>1273</lpage><pub-id pub-id-type="doi">10.1021/acs.biochem.7b01176</pub-id><pub-id pub-id-type="medline">29345908</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kakraba</surname><given-names>S</given-names> </name><name name-style="western"><surname>Knisley</surname><given-names>D</given-names> </name></person-group><article-title>A graph-theoretic model of single point mutations in the cystic fibrosis transmembrane conductance regulator</article-title><source>JBT</source><year>2016</year><volume>6</volume><issue>1</issue><fpage>780</fpage><lpage>786</lpage><pub-id pub-id-type="doi">10.24297/jbt.v6i1.4013</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Kakraba</surname><given-names>S</given-names> </name></person-group><source>Drugs That Protect against Protein Aggregation in Neurodegenerative Diseases</source><year>2021</year><publisher-name>University of Arkansas at Little Rock and University of Arkansas for Medical Sciences</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://login.tulane.idm.oclc.org/login?url=https://www.proquest.com/dissertations-theses/drugs-that-protect-against-protein-aggregation/docview/2569992650/se-2">https://login.tulane.idm.oclc.org/login?url=https://www.proquest.com/dissertations-theses/drugs-that-protect-against-protein-aggregation/docview/2569992650/se-2</ext-link></comment></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Kakraba</surname><given-names>S</given-names> </name></person-group><source>A Hierarchical Graph for Nucleotide Binding Domain 2</source><year>2015</year><access-date>2025-04-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://dc.etsu.edu/etd/2517">https://dc.etsu.edu/etd/2517</ext-link></comment></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Netsey</surname><given-names>EK</given-names> </name><name name-style="western"><surname>Kakraba</surname><given-names>DS</given-names> </name><name name-style="western"><surname>Naandam</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Yadem</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Kakraba</surname><given-names>DS</given-names> </name></person-group><article-title>A mathematical graph-theoretic model of single point mutations associated with sickle cell anemia disease</article-title><source>J Adv Biotechnol</source><year>2021</year><volume>9</volume><fpage>1</fpage><lpage>14</lpage><pub-id pub-id-type="doi">10.24297/jbt.v9i.9109</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Knisley</surname><given-names>DJ</given-names> </name><name name-style="western"><surname>Knisley</surname><given-names>JR</given-names> </name></person-group><article-title>Seeing the results of a mutation with a vertex weighted hierarchical graph</article-title><source>BMC Proc</source><year>2014</year><month>08</month><volume>8</volume><issue>S2</issue><fpage>1</fpage><lpage>8</lpage><pub-id pub-id-type="doi">10.1186/1753-6561-8-S2-S7</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Knisley</surname><given-names>DJ</given-names> </name><name name-style="western"><surname>Knisley</surname><given-names>JR</given-names> </name><name name-style="western"><surname>Herron</surname><given-names>AC</given-names> </name></person-group><article-title>Graph-theoretic models of mutations in the nucleotide binding domain 1 of the cystic fibrosis transmembrane conductance regulator</article-title><source>Comput Biol J</source><year>2013</year><month>04</month><day>3</day><volume>2013</volume><fpage>1</fpage><lpage>9</lpage><pub-id pub-id-type="doi">10.1155/2013/938169</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Balasubramaniam</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ayyadevara</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ganne</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Aggregate interactome based on protein cross-linking interfaces predicts drug targets to limit aggregation in neurodegenerative diseases</article-title><source>iScience</source><year>2019</year><month>10</month><day>25</day><volume>20</volume><fpage>248</fpage><lpage>264</lpage><pub-id pub-id-type="doi">10.1016/j.isci.2019.09.026</pub-id><pub-id pub-id-type="medline">31593839</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Netsey</surname><given-names>EK</given-names> </name><name name-style="western"><surname>Naandam</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Asante</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Structural and functional impacts of SARS-cov-2 spike protein mutations: insights from predictive modeling and analytics</article-title><source>JMIR Bioinform Biotechnol (Forthcoming)</source><year>2025</year><month>03</month><day>8</day><access-date>2025-11-04</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://preprints.jmir.org/preprint/73637">https://preprints.jmir.org/preprint/73637</ext-link></comment></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>H</given-names> </name><name name-style="western"><surname>Srivastav</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Optimizing Parkinson&#x2019;s disease prediction: a comparative analysis of data aggregation methods using multiple voice recordings via an automated artificial intelligence pipeline</article-title><source>Data (Basel)</source><year>2025</year><volume>10</volume><issue>1</issue><fpage>4</fpage><pub-id pub-id-type="doi">10.3390/data10010004</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Kakraba</surname><given-names>S</given-names> </name><name name-style="western"><surname>Yadem</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Abraham</surname><given-names>KE</given-names> </name></person-group><article-title>Unraveling protein secrets: machine learning unveils novel biologically significant associations among amino acids</article-title><source>Computer Science and Mathematics</source><comment>Preprint posted online on  May 3, 2025</comment><pub-id pub-id-type="doi">10.20944/preprints202505.0139.v1</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Soares</surname><given-names>TA</given-names> </name><name name-style="western"><surname>Nunes-Alves</surname><given-names>A</given-names> </name><name name-style="western"><surname>Mazzolari</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ruggiu</surname><given-names>F</given-names> </name><name name-style="western"><surname>Wei</surname><given-names>GW</given-names> </name><name name-style="western"><surname>Merz</surname><given-names>K</given-names> </name></person-group><article-title>The (re)-evolution of quantitative structure-activity relationship (QSAR) studies propelled by the surge of machine learning methods</article-title><source>J Chem Inf Model</source><year>2022</year><month>11</month><day>28</day><volume>62</volume><issue>22</issue><fpage>5317</fpage><lpage>5320</lpage><pub-id pub-id-type="doi">10.1021/acs.jcim.2c01422</pub-id><pub-id pub-id-type="medline">36437763</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ocana</surname><given-names>A</given-names> </name><name name-style="western"><surname>Pandiella</surname><given-names>A</given-names> </name><name name-style="western"><surname>Privat</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Integrating artificial intelligence in drug discovery and early drug development: a transformative approach</article-title><source>Biomark Res</source><year>2025</year><month>03</month><day>14</day><volume>13</volume><issue>1</issue><fpage>45</fpage><pub-id pub-id-type="doi">10.1186/s40364-025-00758-2</pub-id><pub-id pub-id-type="medline">40087789</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Odugbemi</surname><given-names>AI</given-names> </name><name name-style="western"><surname>Nyirenda</surname><given-names>C</given-names> </name><name name-style="western"><surname>Christoffels</surname><given-names>A</given-names> </name><name name-style="western"><surname>Egieyeh</surname><given-names>SA</given-names> </name></person-group><article-title>Artificial intelligence in antidiabetic drug discovery: the advances in QSAR and the prediction of &#x03B1;-glucosidase inhibitors</article-title><source>Comput Struct Biotechnol J</source><year>2024</year><month>12</month><volume>23</volume><fpage>2964</fpage><lpage>2977</lpage><pub-id pub-id-type="doi">10.1016/j.csbj.2024.07.003</pub-id><pub-id pub-id-type="medline">39148608</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schneider</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hommel</surname><given-names>G</given-names> </name><name name-style="western"><surname>Blettner</surname><given-names>M</given-names> </name></person-group><article-title>Linear regression analysis</article-title><source>Dtsch Arztebl Int</source><year>2010</year><volume>107</volume><issue>44</issue><fpage>776</fpage><lpage>782</lpage><pub-id pub-id-type="doi">10.3238/arztebl.2010.0776</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jarantow</surname><given-names>SW</given-names> </name><name name-style="western"><surname>Pisors</surname><given-names>ED</given-names> </name><name name-style="western"><surname>Chiu</surname><given-names>ML</given-names> </name></person-group><article-title>Introduction to the use of linear and nonlinear regression analysis in quantitative biological assays</article-title><source>Curr Protoc</source><year>2023</year><month>06</month><volume>3</volume><issue>6</issue><fpage>e801</fpage><pub-id pub-id-type="doi">10.1002/cpz1.801</pub-id><pub-id pub-id-type="medline">37358238</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schreiber-Gregory</surname><given-names>DN</given-names> </name></person-group><article-title>Ridge regression and multicollinearity: an in-depth review</article-title><source>Model Assist Stat Appl</source><year>2018</year><volume>13</volume><issue>4</issue><fpage>359</fpage><lpage>365</lpage><pub-id pub-id-type="doi">10.3233/MAS-180446</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rubin</surname><given-names>J</given-names> </name><name name-style="western"><surname>Mariani</surname><given-names>L</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>A</given-names> </name><name name-style="western"><surname>Zee</surname><given-names>J</given-names> </name></person-group><article-title>Ridge regression for functional form identification of continuous predictors of clinical outcomes in glomerular disease</article-title><source>Glomerular Dis</source><year>2023</year><volume>3</volume><issue>1</issue><fpage>47</fpage><lpage>55</lpage><pub-id pub-id-type="doi">10.1159/000528847</pub-id><pub-id pub-id-type="medline">37113495</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ranstam</surname><given-names>J</given-names> </name><name name-style="western"><surname>Cook</surname><given-names>JA</given-names> </name></person-group><article-title>LASSO regression</article-title><source>Br J Surg</source><year>2018</year><month>08</month><day>7</day><volume>105</volume><issue>10</issue><fpage>1348</fpage><lpage>1348</lpage><pub-id pub-id-type="doi">10.1002/bjs.10895</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>F</given-names> </name><name name-style="western"><surname>Yin</surname><given-names>Y</given-names> </name></person-group><article-title>Applying logistic LASSO regression for the diagnosis of atypical Crohn&#x2019;s disease</article-title><source>Sci Rep</source><year>2022</year><volume>12</volume><issue>1</issue><fpage>11340</fpage><pub-id pub-id-type="doi">10.1038/s41598-022-15609-5</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hong</surname><given-names>C</given-names> </name><name name-style="western"><surname>Xiong</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Xia</surname><given-names>J</given-names> </name><etal/></person-group><article-title>LASSO-based identification of risk factors and development of a prediction model for sepsis patients</article-title><source>Ther Clin Risk Manag</source><year>2024</year><volume>20</volume><fpage>47</fpage><lpage>58</lpage><pub-id pub-id-type="doi">10.2147/TCRM.S434397</pub-id><pub-id pub-id-type="medline">38344194</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Freijeiro&#x2010;Gonz&#x00E1;lez</surname><given-names>L</given-names> </name><name name-style="western"><surname>Febrero&#x2010;Bande</surname><given-names>M</given-names> </name><name name-style="western"><surname>Gonz&#x00E1;lez&#x2010;Manteiga</surname><given-names>W</given-names> </name></person-group><article-title>A critical review of LASSO and its derivatives for variable selection under dependence among covariates</article-title><source>Int Statistical Rev</source><year>2022</year><month>04</month><volume>90</volume><issue>1</issue><fpage>118</fpage><lpage>145</lpage><pub-id pub-id-type="doi">10.1111/insr.12469</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Delong</surname><given-names>&#x0141;</given-names> </name><name name-style="western"><surname>V W&#x00FC;thrich</surname><given-names>M</given-names> </name></person-group><article-title>Isotonic regression for variance estimation and its role in mean estimation and model validation</article-title><source>N Am Actuar J</source><year>2025</year><month>07</month><day>3</day><volume>29</volume><issue>3</issue><fpage>563</fpage><lpage>591</lpage><pub-id pub-id-type="doi">10.1080/10920277.2024.2421221</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Deng</surname><given-names>H</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>CH</given-names> </name></person-group><article-title>Isotonic regression in multi-dimensional spaces and graphs</article-title><source>Ann Statist</source><year>2020</year><volume>48</volume><issue>6</issue><fpage>3672</fpage><lpage>3698</lpage><pub-id pub-id-type="doi">10.1214/20-AOS1947</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>C</given-names> </name><name name-style="western"><surname>Cao</surname><given-names>X</given-names> </name><name name-style="western"><surname>Tian</surname><given-names>L</given-names> </name></person-group><article-title>Partial least squares regression performs well in MRI-based individualized estimations</article-title><source>Front Neurosci</source><year>2019</year><volume>13</volume><fpage>1282</fpage><pub-id pub-id-type="doi">10.3389/fnins.2019.01282</pub-id><pub-id pub-id-type="medline">31827420</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vicente-Gonzalez</surname><given-names>L</given-names></name><name name-style="western"><surname>Vicente-Villardon</surname><given-names>JL</given-names> </name></person-group><article-title>Partial least squares regression for binary responses and its associated biplot representation</article-title><source>Mathematics</source><year>2022</year><volume>10</volume><issue>15</issue><fpage>2580</fpage><pub-id pub-id-type="doi">10.3390/math10152580</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>J</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Hron</surname><given-names>K</given-names> </name></person-group><article-title>Partial least squares regression with compositional response variables and covariates</article-title><source>J Appl Stat</source><year>2021</year><month>12</month><day>10</day><volume>48</volume><issue>16</issue><fpage>3130</fpage><lpage>3149</lpage><pub-id pub-id-type="doi">10.1080/02664763.2020.1795813</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Awad</surname><given-names>M</given-names> </name><name name-style="western"><surname>Khanna</surname><given-names>R</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Awad</surname><given-names>M</given-names> </name><name name-style="western"><surname>Khanna</surname><given-names>R</given-names> </name></person-group><article-title>Support vector regression</article-title><source>Efficient Learning Machines: Theories, Concepts, and Applications for Engineers and System Designers</source><year>2015</year><publisher-name>Apress</publisher-name><fpage>67</fpage><lpage>80</lpage><pub-id pub-id-type="doi">10.1007/978-1-4302-5990-9_4</pub-id><pub-id pub-id-type="other">9781430259909</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Montesinos L&#x00F3;pez</surname><given-names>OA</given-names> </name><name name-style="western"><surname>Montesinos L&#x00F3;pez</surname><given-names>A</given-names> </name><name name-style="western"><surname>Crossa</surname><given-names>J</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>L&#x00F3;pez</surname><given-names>OA</given-names> </name><name name-style="western"><surname>L&#x00F3;pez</surname><given-names>AM</given-names></name><name name-style="western"><surname>Crossa</surname><given-names>J</given-names> </name></person-group><article-title>Support vector machines and support vector regression</article-title><source>Multivariate Statistical Machine Learning Methods for Genomic Prediction</source><year>2022</year><publisher-name>Springer International Publishing</publisher-name><fpage>337</fpage><lpage>378</lpage><pub-id pub-id-type="doi">10.1007/978-3-030-89010-0_9</pub-id><pub-id pub-id-type="other">9783030890100</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zou</surname><given-names>H</given-names> </name><name name-style="western"><surname>Hastie</surname><given-names>T</given-names> </name></person-group><article-title>Regularization and variable selection via the elastic net</article-title><source>J R Stat Soc Series B</source><year>2005</year><month>04</month><day>1</day><volume>67</volume><issue>2</issue><fpage>301</fpage><lpage>320</lpage><pub-id pub-id-type="doi">10.1111/j.1467-9868.2005.00503.x</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>De Mol</surname><given-names>C</given-names> </name><name name-style="western"><surname>De Vito</surname><given-names>E</given-names> </name><name name-style="western"><surname>Rosasco</surname><given-names>L</given-names> </name></person-group><article-title>Elastic-net regularization in learning theory</article-title><source>J Complex</source><year>2009</year><month>04</month><volume>25</volume><issue>2</issue><fpage>201</fpage><lpage>230</lpage><pub-id pub-id-type="doi">10.1016/j.jco.2009.01.002</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Lai</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Shao</surname><given-names>L</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Xie</surname><given-names>GS</given-names> </name></person-group><article-title>Discriminative elastic-net regularized linear regression</article-title><source>IEEE Trans Image Process</source><year>2017</year><volume>26</volume><issue>3</issue><fpage>1466</fpage><lpage>1481</lpage><pub-id pub-id-type="doi">10.1109/TIP.2017.2651396</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Navada</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ansari</surname><given-names>AN</given-names> </name><name name-style="western"><surname>Patil</surname><given-names>S</given-names> </name><name name-style="western"><surname>Sonkamble</surname><given-names>BA</given-names> </name></person-group><article-title>Overview of use of decision tree algorithms in machine learning</article-title><year>2011</year><conf-name>2011 IEEE Control and System Graduate Research Colloquium</conf-name><fpage>37</fpage><lpage>42</lpage><pub-id pub-id-type="doi">10.1109/ICSGRC.2011.5991826</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Song</surname><given-names>YY</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>Y</given-names> </name></person-group><article-title>Decision tree methods: applications for classification and prediction</article-title><source>Shanghai Arch Psychiatry</source><year>2015</year><month>04</month><day>25</day><volume>27</volume><issue>2</issue><fpage>130</fpage><lpage>135</lpage><pub-id pub-id-type="doi">10.11919/j.issn.1002-0829.215044</pub-id><pub-id pub-id-type="medline">26120265</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mienye</surname><given-names>ID</given-names> </name><name name-style="western"><surname>Jere</surname><given-names>N</given-names> </name></person-group><article-title>A survey of decision trees: concepts, algorithms, and applications</article-title><source>IEEE Access</source><year>2024</year><volume>12</volume><fpage>86716</fpage><lpage>86727</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2024.3416838</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Breiman</surname><given-names>L</given-names> </name></person-group><article-title>Random forests</article-title><source>Mach Learn</source><year>2001</year><month>10</month><volume>45</volume><issue>1</issue><fpage>5</fpage><lpage>32</lpage><pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sun</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>G</given-names> </name><name name-style="western"><surname>Li</surname><given-names>P</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>M</given-names> </name><name name-style="western"><surname>Liang</surname><given-names>X</given-names> </name></person-group><article-title>An improved random forest based on the classification accuracy and correlation measurement of decision trees</article-title><source>Expert Syst Appl</source><year>2024</year><month>03</month><volume>237</volume><fpage>121549</fpage><pub-id pub-id-type="doi">10.1016/j.eswa.2023.121549</pub-id><pub-id pub-id-type="medline">39238945</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Cutler</surname><given-names>A</given-names> </name><name name-style="western"><surname>Cutler</surname><given-names>DR</given-names> </name><name name-style="western"><surname>Stevens</surname><given-names>JR</given-names></name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Zhang</surname><given-names>C</given-names> </name><name name-style="western"><surname>Ma</surname><given-names>Y</given-names> </name></person-group><article-title>Random forests</article-title><source>Ensemble Machine Learning: Methods and Applications</source><year>2012</year><publisher-name>Springer</publisher-name><pub-id pub-id-type="doi">10.1007/978-1-4419-9326-7_5</pub-id><pub-id pub-id-type="other">9781441993267</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Natekin</surname><given-names>A</given-names> </name><name name-style="western"><surname>Knoll</surname><given-names>A</given-names> </name></person-group><article-title>Gradient boosting machines, a tutorial</article-title><source>Front Neurorobot</source><year>2013</year><volume>7</volume><fpage>21</fpage><pub-id pub-id-type="doi">10.3389/fnbot.2013.00021</pub-id><pub-id pub-id-type="medline">24409142</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Aziz</surname><given-names>N</given-names> </name><name name-style="western"><surname>Akhir</surname><given-names>EAP</given-names> </name><name name-style="western"><surname>Aziz</surname><given-names>IA</given-names> </name><name name-style="western"><surname>Jaafar</surname><given-names>J</given-names> </name><name name-style="western"><surname>Hasan</surname><given-names>MH</given-names> </name><name name-style="western"><surname>Abas</surname><given-names>ANC</given-names> </name></person-group><article-title>A study on gradient boosting algorithms for development of AI monitoring and prediction systems</article-title><conf-name>2020 International Conference on Computational Intelligence (ICCI)</conf-name><conf-loc>Bandar Seri Iskandar, Malaysia</conf-loc><fpage>11</fpage><lpage>16</lpage><pub-id pub-id-type="doi">10.1109/ICCI51257.2020.9247843</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Boldini</surname><given-names>D</given-names> </name><name name-style="western"><surname>Grisoni</surname><given-names>F</given-names> </name><name name-style="western"><surname>Kuhn</surname><given-names>D</given-names> </name><name name-style="western"><surname>Friedrich</surname><given-names>L</given-names> </name><name name-style="western"><surname>Sieber</surname><given-names>SA</given-names> </name></person-group><article-title>Practical guidelines for the use of gradient boosting for molecular property prediction</article-title><source>J Cheminform</source><year>2023</year><volume>15</volume><issue>1</issue><fpage>73</fpage><pub-id pub-id-type="doi">10.1186/s13321-023-00743-7</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>T</given-names> </name><name name-style="western"><surname>Guestrin</surname><given-names>C</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Krishnapuram</surname><given-names>B</given-names> </name><name name-style="western"><surname>Shah</surname><given-names>M</given-names> </name></person-group><article-title>Xgboost: a scalable tree boosting system</article-title><source>Proceedings of the 22nd Acm Sigkdd International Conference on Knowledge Discovery and Data Mining</source><year>2016</year><publisher-name>Association for Computing Machinery</publisher-name><fpage>785</fpage><lpage>794</lpage><pub-id pub-id-type="doi">10.1145/2939672.2939785</pub-id><pub-id pub-id-type="other">9781450342322</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Raihan</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Khan</surname><given-names>MAM</given-names> </name><name name-style="western"><surname>Kee</surname><given-names>SH</given-names> </name><name name-style="western"><surname>Nahid</surname><given-names>AA</given-names> </name></person-group><article-title>Detection of the chronic kidney disease using XGBoost classifier and explaining the influence of the attributes on the model using SHAP</article-title><source>Sci Rep</source><year>2023</year><month>04</month><day>17</day><volume>13</volume><issue>1</issue><fpage>6263</fpage><pub-id pub-id-type="doi">10.1038/s41598-023-33525-0</pub-id><pub-id pub-id-type="medline">37069256</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bent&#x00E9;jac</surname><given-names>C</given-names> </name><name name-style="western"><surname>Cs&#x00F6;rg&#x0151;</surname><given-names>A</given-names> </name><name name-style="western"><surname>Mart&#x00ED;nez-Mu&#x00F1;oz</surname><given-names>G</given-names> </name></person-group><article-title>A comparative analysis of gradient boosting algorithms</article-title><source>Artif Intell Rev</source><year>2021</year><month>03</month><volume>54</volume><issue>3</issue><fpage>1937</fpage><lpage>1967</lpage><pub-id pub-id-type="doi">10.1007/s10462-020-09896-5</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moore</surname><given-names>A</given-names> </name><name name-style="western"><surname>Bell</surname><given-names>M</given-names> </name></person-group><article-title>XGBoost, a novel explainable AI technique, in the prediction of myocardial infarction: a UK biobank cohort study</article-title><source>Clin Med Insights Cardiol</source><year>2022</year><volume>16</volume><fpage>11795468221133611</fpage><pub-id pub-id-type="doi">10.1177/11795468221133611</pub-id><pub-id pub-id-type="medline">36386405</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Ni</surname><given-names>M</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Research and application of adaboost algorithm based on SVM</article-title><conf-name>8th Joint International Information Technology and Artificial Intelligence Conference (ITAIC)</conf-name><conf-date>May 24-26, 2019</conf-date><conf-loc>Chongqing, China</conf-loc><fpage>662</fpage><lpage>666</lpage><pub-id pub-id-type="doi">10.1109/ITAIC.2019.8785556</pub-id></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>R</given-names> </name></person-group><article-title>AdaBoost for feature selection, classification and its relation with SVM, a review</article-title><source>Phys Procedia</source><year>2012</year><volume>25</volume><fpage>800</fpage><lpage>807</lpage><pub-id pub-id-type="doi">10.1016/j.phpro.2012.03.160</pub-id></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hancock</surname><given-names>JT</given-names> </name><name name-style="western"><surname>Khoshgoftaar</surname><given-names>TM</given-names> </name></person-group><article-title>CatBoost for big data: an interdisciplinary review</article-title><source>J Big Data</source><year>2020</year><month>12</month><volume>7</volume><issue>1</issue><fpage>94</fpage><pub-id pub-id-type="doi">10.1186/s40537-020-00369-8</pub-id></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ibrahim</surname><given-names>AA</given-names> </name><name name-style="western"><surname>Ridwan</surname><given-names>RL</given-names> </name><name name-style="western"><surname>Muhammed</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Abdulaziz</surname><given-names>RO</given-names> </name><name name-style="western"><surname>Saheed</surname><given-names>GA</given-names> </name></person-group><article-title>Comparison of the CatBoost classifier with other machine learning methods</article-title><source>Int J Adv Comput Sci Appl</source><year>2020</year><volume>11</volume><issue>11</issue><fpage>738</fpage><lpage>748</lpage><pub-id pub-id-type="doi">10.14569/IJACSA.2020.0111190</pub-id></nlm-citation></ref><ref id="ref63"><label>63</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Uddin</surname><given-names>S</given-names> </name><name name-style="western"><surname>Haque</surname><given-names>I</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>H</given-names> </name><name name-style="western"><surname>Moni</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Gide</surname><given-names>E</given-names> </name></person-group><article-title>Comparative performance analysis of K-nearest neighbour (KNN) algorithm and its different variants for disease prediction</article-title><source>Sci Rep</source><year>2022</year><volume>12</volume><issue>1</issue><fpage>6256</fpage><pub-id pub-id-type="doi">10.1038/s41598-022-10358-x</pub-id></nlm-citation></ref><ref id="ref64"><label>64</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Halder</surname><given-names>RK</given-names> </name><name name-style="western"><surname>Uddin</surname><given-names>MN</given-names> </name><name name-style="western"><surname>Uddin</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Aryal</surname><given-names>S</given-names> </name><name name-style="western"><surname>Khraisat</surname><given-names>A</given-names> </name></person-group><article-title>Enhancing K-nearest neighbor algorithm: a comprehensive review and performance analysis of modifications</article-title><source>J Big Data</source><year>2024</year><volume>11</volume><issue>1</issue><fpage>113</fpage><pub-id pub-id-type="doi">10.1186/s40537-024-00973-y</pub-id></nlm-citation></ref><ref id="ref65"><label>65</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>Z</given-names> </name></person-group><article-title>Introduction to machine learning: k-nearest neighbors</article-title><source>Ann Transl Med</source><year>2016</year><month>06</month><volume>4</volume><issue>11</issue><fpage>218</fpage><lpage>218</lpage><pub-id pub-id-type="doi">10.21037/atm.2016.03.37</pub-id></nlm-citation></ref><ref id="ref66"><label>66</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Guo</surname><given-names>G</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Bell</surname><given-names>D</given-names> </name><name name-style="western"><surname>Bi</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Greer</surname><given-names>K</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Meersman</surname><given-names>R</given-names> </name><name name-style="western"><surname>Tari</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Schmidt</surname><given-names>DC</given-names> </name><name name-style="western"><surname>Meersman</surname><given-names>R</given-names> </name><name name-style="western"><surname>Tari</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Meersman</surname><given-names>R</given-names> </name><name name-style="western"><surname>Tari</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Schmidt</surname><given-names>DC</given-names> </name></person-group><article-title>KNN model-based approach in classification</article-title><source>On The Move to Meaningful Internet Systems 2003: CoopIS, DOA, and ODBASE</source><year>2003</year><publisher-name>Springer</publisher-name><fpage>986</fpage><lpage>996</lpage><pub-id pub-id-type="doi">10.1007/978-3-540-39964-3_62</pub-id><pub-id pub-id-type="other">9783540399643</pub-id></nlm-citation></ref><ref id="ref67"><label>67</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Uhrig</surname><given-names>RE</given-names> </name></person-group><article-title>Introduction to artificial neural networks</article-title><year>1995</year><conf-name>IECON &#x2019;95 - 21st Annual Conference on IEEE Industrial Electronics</conf-name><pub-id pub-id-type="doi">10.1109/IECON.1995.483329</pub-id></nlm-citation></ref><ref id="ref68"><label>68</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Han</surname><given-names>SH</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>KW</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>S</given-names> </name><name name-style="western"><surname>Youn</surname><given-names>YC</given-names> </name></person-group><article-title>Artificial neural network: understanding the basic concepts without mathematics</article-title><source>Dement Neurocognitive Disord</source><year>2018</year><volume>17</volume><issue>3</issue><fpage>83</fpage><pub-id pub-id-type="doi">10.12779/dnd.2018.17.3.83</pub-id></nlm-citation></ref><ref id="ref69"><label>69</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schmidhuber</surname><given-names>J</given-names> </name></person-group><article-title>Deep learning in neural networks: an overview</article-title><source>Neural Netw</source><year>2015</year><month>01</month><volume>61</volume><fpage>85</fpage><lpage>117</lpage><pub-id pub-id-type="doi">10.1016/j.neunet.2014.09.003</pub-id><pub-id pub-id-type="medline">25462637</pub-id></nlm-citation></ref><ref id="ref70"><label>70</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Grossi</surname><given-names>E</given-names> </name><name name-style="western"><surname>Buscema</surname><given-names>M</given-names> </name></person-group><article-title>Introduction to artificial neural networks</article-title><source>Eur J Gastroenterol Hepatol</source><year>2007</year><month>12</month><volume>19</volume><issue>12</issue><fpage>1046</fpage><lpage>1054</lpage><pub-id pub-id-type="doi">10.1097/MEG.0b013e3282f198a0</pub-id></nlm-citation></ref><ref id="ref71"><label>71</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Goel</surname><given-names>A</given-names> </name><name name-style="western"><surname>Goel</surname><given-names>AK</given-names> </name><name name-style="western"><surname>Kumar</surname><given-names>A</given-names> </name></person-group><article-title>The role of artificial neural network and machine learning in utilizing spatial information</article-title><source>Spat Inf Res</source><year>2023</year><month>06</month><volume>31</volume><issue>3</issue><fpage>275</fpage><lpage>285</lpage><pub-id pub-id-type="doi">10.1007/s41324-022-00494-x</pub-id></nlm-citation></ref><ref id="ref72"><label>72</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Deringer</surname><given-names>VL</given-names> </name><name name-style="western"><surname>Bart&#x00F3;k</surname><given-names>AP</given-names> </name><name name-style="western"><surname>Bernstein</surname><given-names>N</given-names> </name><name name-style="western"><surname>Wilkins</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Ceriotti</surname><given-names>M</given-names> </name><name name-style="western"><surname>Cs&#x00E1;nyi</surname><given-names>G</given-names> </name></person-group><article-title>Gaussian process regression for materials and molecules</article-title><source>Chem Rev</source><year>2021</year><month>08</month><day>25</day><volume>121</volume><issue>16</issue><fpage>10073</fpage><lpage>10141</lpage><pub-id pub-id-type="doi">10.1021/acs.chemrev.1c00022</pub-id><pub-id pub-id-type="medline">34398616</pub-id></nlm-citation></ref><ref id="ref73"><label>73</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Ebden</surname><given-names>M</given-names> </name></person-group><article-title>Gaussian processes: a quick introduction</article-title><comment>Preprint posted online on 2015</comment></nlm-citation></ref><ref id="ref74"><label>74</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schulz</surname><given-names>E</given-names> </name><name name-style="western"><surname>Speekenbrink</surname><given-names>M</given-names> </name><name name-style="western"><surname>Krause</surname><given-names>A</given-names> </name></person-group><article-title>A tutorial on gaussian process regression: modelling, exploring, and exploiting functions</article-title><source>J Math Psychol</source><year>2018</year><month>08</month><volume>85</volume><fpage>1</fpage><lpage>16</lpage><pub-id pub-id-type="doi">10.1016/j.jmp.2018.03.001</pub-id></nlm-citation></ref><ref id="ref75"><label>75</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mendelsohn</surname><given-names>LD</given-names> </name></person-group><article-title>ChemDraw 8 Ultra, Windows and Macintosh versions</article-title><source>J Chem Inf Comput Sci</source><year>2004</year><month>11</month><day>1</day><volume>44</volume><issue>6</issue><fpage>2225</fpage><lpage>2226</lpage><pub-id pub-id-type="doi">10.1021/ci040123t</pub-id></nlm-citation></ref><ref id="ref76"><label>76</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sankar</surname><given-names>K</given-names> </name><name name-style="western"><surname>Trainor</surname><given-names>K</given-names> </name><name name-style="western"><surname>Blazer</surname><given-names>LL</given-names> </name><etal/></person-group><article-title>A descriptor set for quantitative structure-property relationship prediction in biologics</article-title><source>Mol Inform</source><year>2022</year><month>09</month><volume>41</volume><issue>9</issue><fpage>e2100240</fpage><pub-id pub-id-type="doi">10.1002/minf.202100240</pub-id><pub-id pub-id-type="medline">35277930</pub-id></nlm-citation></ref><ref id="ref77"><label>77</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sivakumar</surname><given-names>M</given-names> </name><name name-style="western"><surname>Parthasarathy</surname><given-names>S</given-names> </name><name name-style="western"><surname>Padmapriya</surname><given-names>T</given-names> </name></person-group><article-title>Trade-off between training and testing ratio in machine learning for medical image processing</article-title><source>PeerJ Comput Sci</source><year>2024</year><volume>10</volume><fpage>e2245</fpage><pub-id pub-id-type="doi">10.7717/peerj-cs.2245</pub-id></nlm-citation></ref><ref id="ref78"><label>78</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shimizu</surname><given-names>H</given-names> </name><name name-style="western"><surname>Enda</surname><given-names>K</given-names> </name><name name-style="western"><surname>Shimizu</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Machine learning algorithms: prediction and feature selection for clinical refracture after surgically treated fragility fracture</article-title><source>J Clin Med</source><year>2022</year><month>04</month><day>5</day><volume>11</volume><issue>7</issue><fpage>2021</fpage><pub-id pub-id-type="doi">10.3390/jcm11072021</pub-id><pub-id pub-id-type="medline">35407629</pub-id></nlm-citation></ref><ref id="ref79"><label>79</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Prokhorenkova</surname><given-names>L</given-names> </name><name name-style="western"><surname>Gusev</surname><given-names>G</given-names> </name><name name-style="western"><surname>Vorobev</surname><given-names>A</given-names> </name><name name-style="western"><surname>Dorogush</surname><given-names>AV</given-names> </name><name name-style="western"><surname>Gulin</surname><given-names>A</given-names> </name></person-group><article-title>Catboost: unbiased boosting with categorical features</article-title><source>Adv Neural Inf Process Syst</source><year>2018</year><fpage>6638</fpage><lpage>6648</lpage><pub-id pub-id-type="doi">10.5555/3327757.3327770</pub-id></nlm-citation></ref><ref id="ref80"><label>80</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jierula</surname><given-names>A</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Oh</surname><given-names>TM</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>P</given-names> </name></person-group><article-title>Study on accuracy metrics for evaluating the predictions of damage locations in deep piles using artificial neural networks with acoustic emission data</article-title><source>Appl Sci (Basel)</source><year>2021</year><volume>11</volume><issue>5</issue><fpage>2314</fpage><pub-id pub-id-type="doi">10.3390/app11052314</pub-id></nlm-citation></ref><ref id="ref81"><label>81</label><nlm-citation citation-type="report"><person-group person-group-type="author"><name name-style="western"><surname>Van</surname><given-names>RG</given-names> </name><name name-style="western"><surname>Drake</surname><given-names>FL</given-names> </name></person-group><article-title>Python reference manual</article-title><year>2006</year><month>10</month><volume>22</volume><fpage>9117</fpage><lpage>9129</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.python.org/doc/">https://www.python.org/doc/</ext-link></comment></nlm-citation></ref><ref id="ref82"><label>82</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>Z</given-names> </name></person-group><article-title>Extracting spatial effects from machine learning model using local interpretation method: an example of SHAP and XGBoost</article-title><source>Comput Environ Urban Syst</source><year>2022</year><month>09</month><volume>96</volume><fpage>101845</fpage><pub-id pub-id-type="doi">10.1016/j.compenvurbsys.2022.101845</pub-id></nlm-citation></ref><ref id="ref83"><label>83</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Liang</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Hancock</surname><given-names>JT</given-names> </name><name name-style="western"><surname>Khoshgoftaar</surname><given-names>TM</given-names> </name></person-group><article-title>Feature selection strategies: a comparative analysis of SHAP-value and importance-based methods</article-title><source>J Big Data</source><year>2024</year><volume>11</volume><issue>1</issue><fpage>1</fpage><lpage>16</lpage><pub-id pub-id-type="doi">10.1186/s40537-024-00905-w</pub-id></nlm-citation></ref><ref id="ref84"><label>84</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Khan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Noor</surname><given-names>S</given-names> </name><name name-style="western"><surname>Javed</surname><given-names>T</given-names> </name><etal/></person-group><article-title>XGBoost-enhanced ensemble model using discriminative hybrid features for the prediction of sumoylation sites</article-title><source>BioData Min</source><year>2025</year><month>02</month><day>3</day><volume>18</volume><issue>1</issue><fpage>12</fpage><pub-id pub-id-type="doi">10.1186/s13040-024-00415-8</pub-id><pub-id pub-id-type="medline">39901279</pub-id></nlm-citation></ref><ref id="ref85"><label>85</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vamathevan</surname><given-names>J</given-names> </name><name name-style="western"><surname>Clark</surname><given-names>D</given-names> </name><name name-style="western"><surname>Czodrowski</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Applications of machine learning in drug discovery and development</article-title><source>Nat Rev Drug Discov</source><year>2019</year><month>06</month><volume>18</volume><issue>6</issue><fpage>463</fpage><lpage>477</lpage><pub-id pub-id-type="doi">10.1038/s41573-019-0024-5</pub-id><pub-id pub-id-type="medline">30976107</pub-id></nlm-citation></ref><ref id="ref86"><label>86</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>M</given-names> </name><name name-style="western"><surname>Srivastava</surname><given-names>G</given-names> </name><name name-style="western"><surname>Ramanujam</surname><given-names>J</given-names> </name><name name-style="western"><surname>Brylinski</surname><given-names>M</given-names> </name></person-group><article-title>Insights from augmented data integration and strong regularization in drug synergy prediction with SynerGNet</article-title><source>Mach Learn Knowl Extr</source><year>2024</year><volume>6</volume><issue>3</issue><fpage>1782</fpage><lpage>1797</lpage><pub-id pub-id-type="doi">10.3390/make6030087</pub-id></nlm-citation></ref><ref id="ref87"><label>87</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Obaido</surname><given-names>G</given-names> </name><name name-style="western"><surname>Mienye</surname><given-names>ID</given-names> </name><name name-style="western"><surname>Egbelowo</surname><given-names>OF</given-names> </name><etal/></person-group><article-title>Supervised machine learning in drug discovery and development: algorithms, applications, challenges, and prospects</article-title><source>Machine Learning with Applications</source><year>2024</year><month>09</month><volume>17</volume><fpage>100576</fpage><pub-id pub-id-type="doi">10.1016/j.mlwa.2024.100576</pub-id></nlm-citation></ref><ref id="ref88"><label>88</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sharma</surname><given-names>A</given-names> </name><name name-style="western"><surname>Lysenko</surname><given-names>A</given-names> </name><name name-style="western"><surname>Jia</surname><given-names>S</given-names> </name><name name-style="western"><surname>Boroevich</surname><given-names>KA</given-names> </name><name name-style="western"><surname>Tsunoda</surname><given-names>T</given-names> </name></person-group><article-title>Advances in AI and machine learning for predictive medicine</article-title><source>J Hum Genet</source><year>2024</year><month>10</month><volume>69</volume><issue>10</issue><fpage>487</fpage><lpage>497</lpage><pub-id pub-id-type="doi">10.1038/s10038-024-01231-y</pub-id><pub-id pub-id-type="medline">38424184</pub-id></nlm-citation></ref><ref id="ref89"><label>89</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>G</given-names> </name><name name-style="western"><surname>Ding</surname><given-names>J</given-names> </name><name name-style="western"><surname>Pei</surname><given-names>Q</given-names> </name></person-group><article-title>Machine learning for prediction of drug concentrations: application and challenges</article-title><source>Clin Pharmacol Ther</source><year>2025</year><month>05</month><volume>117</volume><issue>5</issue><fpage>1236</fpage><lpage>1247</lpage><pub-id pub-id-type="doi">10.1002/cpt.3577</pub-id><pub-id pub-id-type="medline">39901656</pub-id></nlm-citation></ref><ref id="ref90"><label>90</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Patel</surname><given-names>L</given-names> </name><name name-style="western"><surname>Shukla</surname><given-names>T</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Ussery</surname><given-names>DW</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>S</given-names> </name></person-group><article-title>Machine learning methods in drug discovery</article-title><source>Molecules</source><year>2020</year><month>11</month><day>12</day><volume>25</volume><issue>22</issue><fpage>5277</fpage><pub-id pub-id-type="doi">10.3390/molecules25225277</pub-id><pub-id pub-id-type="medline">33198233</pub-id></nlm-citation></ref><ref id="ref91"><label>91</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ohnuki</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Akiyama</surname><given-names>M</given-names> </name><name name-style="western"><surname>Sakakibara</surname><given-names>Y</given-names> </name></person-group><article-title>Deep learning of multimodal networks with topological regularization for drug repositioning</article-title><source>J Cheminform</source><year>2024</year><month>08</month><day>23</day><volume>16</volume><issue>1</issue><fpage>103</fpage><pub-id pub-id-type="doi">10.1186/s13321-024-00897-y</pub-id><pub-id pub-id-type="medline">39180095</pub-id></nlm-citation></ref><ref id="ref92"><label>92</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ahmed</surname><given-names>NY</given-names> </name><name name-style="western"><surname>Alsanousi</surname><given-names>WA</given-names> </name><name name-style="western"><surname>Hamid</surname><given-names>EM</given-names> </name><etal/></person-group><article-title>An efficient deep learning approach for DNA-binding proteins classification from primary sequences</article-title><source>Int J Comput Intell Syst</source><year>2024</year><volume>17</volume><issue>1</issue><fpage>1</fpage><lpage>14</lpage><pub-id pub-id-type="doi">10.1007/s44196-024-00462-3</pub-id></nlm-citation></ref><ref id="ref93"><label>93</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Thedinga</surname><given-names>K</given-names> </name><name name-style="western"><surname>Herwig</surname><given-names>R</given-names> </name></person-group><article-title>A gradient tree boosting and network propagation derived pan-cancer survival network of the tumor microenvironment</article-title><source>iScience</source><year>2022</year><month>01</month><day>21</day><volume>25</volume><issue>1</issue><fpage>103617</fpage><pub-id pub-id-type="doi">10.1016/j.isci.2021.103617</pub-id><pub-id pub-id-type="medline">35106465</pub-id></nlm-citation></ref><ref id="ref94"><label>94</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Claude</surname><given-names>E</given-names> </name><name name-style="western"><surname>Leclercq</surname><given-names>M</given-names> </name><name name-style="western"><surname>Th&#x00E9;bault</surname><given-names>P</given-names> </name><name name-style="western"><surname>Droit</surname><given-names>A</given-names> </name><name name-style="western"><surname>Uricaru</surname><given-names>R</given-names> </name></person-group><article-title>Optimizing hybrid ensemble feature selection strategies for transcriptomic biomarker discovery in complex diseases</article-title><source>NAR Genomics and Bioinformatics</source><year>2024</year><month>07</month><day>2</day><volume>6</volume><issue>3</issue><fpage>79</fpage><pub-id pub-id-type="doi">10.1093/nargab/lqae079</pub-id></nlm-citation></ref><ref id="ref95"><label>95</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>BR</given-names> </name><name name-style="western"><surname>Ormazabal Arriagada</surname><given-names>S</given-names> </name><name name-style="western"><surname>Hsu</surname><given-names>TC</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>TW</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>C</given-names> </name></person-group><article-title>Exploiting common patterns in diverse cancer types via multi-task learning</article-title><source>NPJ Precis Oncol</source><year>2024</year><month>10</month><day>29</day><volume>8</volume><issue>1</issue><fpage>245</fpage><pub-id pub-id-type="doi">10.1038/s41698-024-00700-z</pub-id><pub-id pub-id-type="medline">39472543</pub-id></nlm-citation></ref><ref id="ref96"><label>96</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Airlangga</surname><given-names>G</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>A</given-names> </name></person-group><article-title>A hybrid gradient boosting and neural network model for predicting urban happiness: integrating ensemble learning with deep representation for enhanced accuracy</article-title><source>Mach Learn Knowl Extr</source><year>2025</year><volume>7</volume><issue>1</issue><fpage>4</fpage><pub-id pub-id-type="doi">10.3390/make7010004</pub-id></nlm-citation></ref><ref id="ref97"><label>97</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Arora</surname><given-names>K</given-names> </name><name name-style="western"><surname>Schlick</surname><given-names>T</given-names> </name></person-group><article-title>In silico evidence for DNA polymerase-beta&#x2019;s substrate-induced conformational change</article-title><source>Biophys J</source><year>2004</year><month>11</month><volume>87</volume><issue>5</issue><fpage>3088</fpage><lpage>3099</lpage><pub-id pub-id-type="doi">10.1529/biophysj.104.040915</pub-id><pub-id pub-id-type="medline">15507687</pub-id></nlm-citation></ref><ref id="ref98"><label>98</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jonsson</surname><given-names>CB</given-names> </name><name name-style="western"><surname>Golden</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Meibohm</surname><given-names>B</given-names> </name></person-group><article-title>Time to &#x201C;Mind the Gap&#x201D; in novel small molecule drug discovery for direct-acting antivirals for SARS-CoV-2</article-title><source>Curr Opin Virol</source><year>2021</year><month>10</month><volume>50</volume><fpage>1</fpage><lpage>7</lpage><pub-id pub-id-type="doi">10.1016/j.coviro.2021.06.008</pub-id><pub-id pub-id-type="medline">34256351</pub-id></nlm-citation></ref><ref id="ref99"><label>99</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Markowicz-Piasecka</surname><given-names>M</given-names> </name><name name-style="western"><surname>Markiewicz</surname><given-names>A</given-names> </name><name name-style="western"><surname>Dar&#x0142;ak</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Current chemical, biological, and physiological views in the development of successful brain-targeted pharmaceutics</article-title><source>Neurotherapeutics</source><year>2022</year><month>04</month><volume>19</volume><issue>3</issue><fpage>942</fpage><lpage>976</lpage><pub-id pub-id-type="doi">10.1007/s13311-022-01228-5</pub-id><pub-id pub-id-type="medline">35391662</pub-id></nlm-citation></ref><ref id="ref100"><label>100</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>W</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>EY</given-names> </name><name name-style="western"><surname>Hellinga</surname><given-names>HW</given-names> </name><name name-style="western"><surname>Beese</surname><given-names>LS</given-names> </name></person-group><article-title>Structural factors that determine selectivity of a high fidelity DNA polymerase for deoxy-, dideoxy-, and ribonucleotides</article-title><source>Journal of Biological Chemistry</source><year>2012</year><month>08</month><volume>287</volume><issue>34</issue><fpage>28215</fpage><lpage>28226</lpage><pub-id pub-id-type="doi">10.1074/jbc.M112.366609</pub-id></nlm-citation></ref><ref id="ref101"><label>101</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Beard</surname><given-names>WA</given-names> </name><name name-style="western"><surname>Wilson</surname><given-names>SH</given-names> </name></person-group><article-title>Structure and mechanism of DNA polymerase &#x03B2;</article-title><source>Biochemistry</source><year>2014</year><month>05</month><day>6</day><volume>53</volume><issue>17</issue><fpage>2768</fpage><lpage>2780</lpage><pub-id pub-id-type="doi">10.1021/bi500139h</pub-id><pub-id pub-id-type="medline">24717170</pub-id></nlm-citation></ref><ref id="ref102"><label>102</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Batra</surname><given-names>VK</given-names> </name><name name-style="western"><surname>Beard</surname><given-names>WA</given-names> </name><name name-style="western"><surname>Shock</surname><given-names>DD</given-names> </name><name name-style="western"><surname>Pedersen</surname><given-names>LC</given-names> </name><name name-style="western"><surname>Wilson</surname><given-names>SH</given-names> </name></person-group><article-title>Structures of DNA polymerase &#x03B2; with active-site mismatches suggest a transient abasic site intermediate during misincorporation</article-title><source>Mol Cell</source><year>2008</year><month>05</month><day>9</day><volume>30</volume><issue>3</issue><fpage>315</fpage><lpage>324</lpage><pub-id pub-id-type="doi">10.1016/j.molcel.2008.02.025</pub-id><pub-id pub-id-type="medline">18471977</pub-id></nlm-citation></ref><ref id="ref103"><label>103</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mabesoone</surname><given-names>MFJ</given-names> </name><name name-style="western"><surname>Palmans</surname><given-names>ARA</given-names> </name><name name-style="western"><surname>Meijer</surname><given-names>EW</given-names> </name></person-group><article-title>Solute&#x2013;solvent interactions in modern physical organic chemistry: supramolecular polymers as a muse</article-title><source>J Am Chem Soc</source><year>2020</year><month>11</month><day>25</day><volume>142</volume><issue>47</issue><fpage>19781</fpage><lpage>19798</lpage><pub-id pub-id-type="doi">10.1021/jacs.0c09293</pub-id><pub-id pub-id-type="medline">33174741</pub-id></nlm-citation></ref><ref id="ref104"><label>104</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Meng</surname><given-names>X</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>H</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Secundo</surname><given-names>F</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>Y</given-names> </name></person-group><article-title>Enzyme stability and activity in non-aqueous reaction systems: a mini review</article-title><source>Catalysts</source><year>2016</year><volume>6</volume><issue>2</issue><fpage>32</fpage><pub-id pub-id-type="doi">10.3390/catal6020032</pub-id></nlm-citation></ref><ref id="ref105"><label>105</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tomasi</surname><given-names>J</given-names> </name><name name-style="western"><surname>Mennucci</surname><given-names>B</given-names> </name><name name-style="western"><surname>Cammi</surname><given-names>R</given-names> </name></person-group><article-title>Quantum mechanical continuum solvation models</article-title><source>Chem Rev</source><year>2005</year><month>08</month><volume>105</volume><issue>8</issue><fpage>2999</fpage><lpage>3093</lpage><pub-id pub-id-type="doi">10.1021/cr9904009</pub-id><pub-id pub-id-type="medline">16092826</pub-id></nlm-citation></ref><ref id="ref106"><label>106</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Senhora</surname><given-names>FV</given-names> </name><name name-style="western"><surname>Chi</surname><given-names>H</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Mirabella</surname><given-names>L</given-names> </name><name name-style="western"><surname>Tang</surname><given-names>TL</given-names> </name><name name-style="western"><surname>Paulino</surname><given-names>GH</given-names> </name></person-group><article-title>Machine learning for topology optimization: physics-based learning through an independent training strategy</article-title><source>Comput Methods Appl Mech Eng</source><year>2022</year><month>08</month><volume>398</volume><fpage>115116</fpage><pub-id pub-id-type="doi">10.1016/j.cma.2022.115116</pub-id></nlm-citation></ref><ref id="ref107"><label>107</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tang</surname><given-names>T</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Topology optimization: a review for structural designs under statics problems</article-title><source>Materials (Basel)</source><year>2024</year><month>12</month><day>6</day><volume>17</volume><issue>23</issue><fpage>5970</fpage><pub-id pub-id-type="doi">10.3390/ma17235970</pub-id><pub-id pub-id-type="medline">39685406</pub-id></nlm-citation></ref><ref id="ref108"><label>108</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kazmi</surname><given-names>B</given-names> </name><name name-style="western"><surname>Taqvi</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Juchelkov</surname><given-names>D</given-names> </name><name name-style="western"><surname>Li</surname><given-names>G</given-names> </name><name name-style="western"><surname>Naqvi</surname><given-names>SR</given-names> </name></person-group><article-title>Artificial intelligence-enhanced solubility predictions of greenhouse gases in ionic liquids: a review</article-title><source>Results Eng</source><year>2025</year><month>03</month><volume>25</volume><fpage>103851</fpage><pub-id pub-id-type="doi">10.1016/j.rineng.2024.103851</pub-id></nlm-citation></ref><ref id="ref109"><label>109</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Panapitiya</surname><given-names>G</given-names> </name><name name-style="western"><surname>Girard</surname><given-names>M</given-names> </name><name name-style="western"><surname>Hollas</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Evaluation of deep learning architectures for aqueous solubility prediction</article-title><source>ACS Omega</source><year>2022</year><month>05</month><day>10</day><volume>7</volume><issue>18</issue><fpage>15695</fpage><lpage>15710</lpage><pub-id pub-id-type="doi">10.1021/acsomega.2c00642</pub-id><pub-id pub-id-type="medline">35571767</pub-id></nlm-citation></ref><ref id="ref110"><label>110</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mohanty</surname><given-names>PK</given-names> </name><name name-style="western"><surname>Francis</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Barik</surname><given-names>RK</given-names> </name><name name-style="western"><surname>Roy</surname><given-names>DS</given-names> </name><name name-style="western"><surname>Saikia</surname><given-names>MJ</given-names> </name></person-group><article-title>Leveraging shapley additive explanations for feature selection in ensemble models for diabetes prediction</article-title><source>Bioengineering (Basel)</source><year>2024</year><month>11</month><day>30</day><volume>11</volume><issue>12</issue><fpage>1215</fpage><pub-id pub-id-type="doi">10.3390/bioengineering11121215</pub-id><pub-id pub-id-type="medline">39768033</pub-id></nlm-citation></ref><ref id="ref111"><label>111</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Salgado</surname><given-names>PS</given-names> </name><name name-style="western"><surname>Makeyev</surname><given-names>EV</given-names> </name><name name-style="western"><surname>Butcher</surname><given-names>SJ</given-names> </name><name name-style="western"><surname>Bamford</surname><given-names>DH</given-names> </name><name name-style="western"><surname>Stuart</surname><given-names>DI</given-names> </name><name name-style="western"><surname>Grimes</surname><given-names>JM</given-names> </name></person-group><article-title>The structural basis for RNA specificity and Ca2+ inhibition of an RNA-dependent RNA polymerase</article-title><source>Structure</source><year>2004</year><month>02</month><volume>12</volume><issue>2</issue><fpage>307</fpage><lpage>316</lpage><pub-id pub-id-type="doi">10.1016/j.str.2004.01.012</pub-id><pub-id pub-id-type="medline">14962391</pub-id></nlm-citation></ref><ref id="ref112"><label>112</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gupta</surname><given-names>MN</given-names> </name></person-group><article-title>Enzyme function in organic solvents</article-title><source>Eur J Biochem</source><year>1992</year><month>01</month><day>15</day><volume>203</volume><issue>1-2</issue><fpage>25</fpage><lpage>32</lpage><pub-id pub-id-type="doi">10.1111/j.1432-1033.1992.tb19823.x</pub-id><pub-id pub-id-type="medline">1730231</pub-id></nlm-citation></ref><ref id="ref113"><label>113</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zeindlhofer</surname><given-names>V</given-names> </name><name name-style="western"><surname>Schr&#x00F6;der</surname><given-names>C</given-names> </name></person-group><article-title>Computational solvation analysis of biomolecules in aqueous ionic liquid mixtures: from large flexible proteins to small rigid drugs</article-title><source>Biophys Rev</source><year>2018</year><month>06</month><volume>10</volume><issue>3</issue><fpage>825</fpage><lpage>840</lpage><pub-id pub-id-type="doi">10.1007/s12551-018-0416-5</pub-id><pub-id pub-id-type="medline">29687270</pub-id></nlm-citation></ref><ref id="ref114"><label>114</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Warshel</surname><given-names>A</given-names> </name><name name-style="western"><surname>Aqvist</surname><given-names>J</given-names> </name><name name-style="western"><surname>Creighton</surname><given-names>S</given-names> </name></person-group><article-title>Enzymes work by solvation substitution rather than by desolvation</article-title><source>Proc Natl Acad Sci USA</source><year>1989</year><month>08</month><volume>86</volume><issue>15</issue><fpage>5820</fpage><lpage>5824</lpage><pub-id pub-id-type="doi">10.1073/pnas.86.15.5820</pub-id></nlm-citation></ref><ref id="ref115"><label>115</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sethi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Agrawal</surname><given-names>N</given-names> </name><name name-style="western"><surname>Brezovsky</surname><given-names>J</given-names> </name></person-group><article-title>Impact of water models on the structure and dynamics of enzyme tunnels</article-title><source>Comput Struct Biotechnol J</source><year>2024</year><month>12</month><volume>23</volume><fpage>3946</fpage><lpage>3954</lpage><pub-id pub-id-type="doi">10.1016/j.csbj.2024.10.051</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Molecular database: molecular database of molecular descriptors representing structural, physicochemical, and quantum properties were calculated using Schr&#x00F6;dinger MAESTRO 12.5 software [<xref ref-type="bibr" rid="ref76">76</xref>]. These included 1D attributes (atom count, molecular weight), 2D features (topological indices, functional groups), 3D characteristics (dipole moment, spatial volume), and 4D properties (HOMO-LUMO energies, electronegativity). A total of 220 descriptors were integrated with experimental inhibition data to enable QSAR modeling.</p><media xlink:href="ai_v4i1e77890_app1.xlsx" xlink:title="XLSX File, 139 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Supplementary information.</p><media xlink:href="ai_v4i1e77890_app2.docx" xlink:title="DOCX File, 20 KB"/></supplementary-material></app-group></back></article>