<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR AI</journal-id><journal-id journal-id-type="publisher-id">ai</journal-id><journal-id journal-id-type="index">41</journal-id><journal-title>JMIR AI</journal-title><abbrev-journal-title>JMIR AI</abbrev-journal-title><issn pub-type="epub">2817-1705</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v4i1e76056</article-id><article-id pub-id-type="doi">10.2196/76056</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Performance of DeepSeek and GPT Models on Pediatric Board Preparation Questions: Comparative Evaluation</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Mansoor</surname><given-names>Masab</given-names></name><degrees>BS, MBA, DBA</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Ibrahim</surname><given-names>Andrew</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Hamide</surname><given-names>Ali</given-names></name><degrees>BS, MS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Louisiana Campus, Edward Via College of Osteopathic Medicine</institution><addr-line>4408 Bon Aire Dr</addr-line><addr-line>Monroe</addr-line><addr-line>LA</addr-line><country>United States</country></aff><aff id="aff2"><institution>School of Medicine, Texas Tech University Health Sciences Center</institution><addr-line>Lubbock</addr-line><addr-line>TX</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Liu</surname><given-names>Hongfang</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Uchenna</surname><given-names>Akobundu</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Pant</surname><given-names>Dewank</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Potla</surname><given-names>Ravi Teja</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Guo</surname><given-names>Song-Bin</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Sunny</surname><given-names/></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Au</surname><given-names>Chi Lik</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Masab Mansoor, BS, MBA, DBA, Louisiana Campus, Edward Via College of Osteopathic Medicine, 4408 Bon Aire Dr, Monroe, LA, 71203, United States, 1 5045213500; <email>mmansoor@vcom.edu</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>27</day><month>8</month><year>2025</year></pub-date><volume>4</volume><elocation-id>e76056</elocation-id><history><date date-type="received"><day>15</day><month>04</month><year>2025</year></date><date date-type="rev-recd"><day>14</day><month>05</month><year>2025</year></date><date date-type="accepted"><day>30</day><month>07</month><year>2025</year></date></history><copyright-statement>&#x00A9; Masab Mansoor, Andrew Ibrahim, Ali Hamide. Originally published in JMIR AI (<ext-link ext-link-type="uri" xlink:href="https://ai.jmir.org">https://ai.jmir.org</ext-link>), 27.8.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR AI, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.ai.jmir.org/">https://www.ai.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://ai.jmir.org/2025/1/e76056"/><abstract><sec><title>Background</title><p>Limited research exists evaluating artificial intelligence (AI) performance on standardized pediatric assessments. This study evaluated 3 leading AI models on pediatric board preparation questions.</p></sec><sec><title>Objective</title><p>The aim of this study is to evaluate and compare the performance of 3 leading large language models (LLMs) on pediatric board examination preparation questions and contextualize their performance against human physician benchmarks.</p></sec><sec sec-type="methods"><title>Methods</title><p>We analyzed DeepSeek-R1, ChatGPT-4, and ChatGPT-4.5 using 266 multiple-choice questions from the 2023 PREP Self-Assessment. Performance was compared to published American Board of Pediatrics first-time pass rates.</p></sec><sec sec-type="results"><title>Results</title><p>DeepSeek-R1 exhibited the highest accuracy at 98.1% (261/266 correct responses). ChatGPT-4.5 achieved 96.6% accuracy (257/266), performing at the upper threshold of human performance. ChatGPT-4 demonstrated 82.7% accuracy (220/266), comparable to the lower range of human pass rates. Error pattern analysis revealed that AI models most commonly struggled with questions requiring integration of complex clinical presentations with rare disease knowledge.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>DeepSeek-R1 demonstrated exceptional performance exceeding typical American Board of Pediatrics pass rates, suggesting potential applications in medical education and clinical support, though further research on complex clinical reasoning is needed.</p></sec></abstract><kwd-group><kwd>artificial intelligence</kwd><kwd>large language models</kwd><kwd>medical education</kwd><kwd>pediatrics</kwd><kwd>board examination</kwd><kwd>DeepSeek</kwd><kwd>ChatGPT</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>The integration of artificial intelligence (AI) in medical education and assessment raises important questions about the capabilities of large language models (LLMs) in understanding and applying pediatric knowledge. Recent advancements in AI have produced models with increasingly advanced medical reasoning capabilities [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>], but limited research exists evaluating AI performance on standardized medical assessments. This study evaluates the performance of 3 leading LLMs (DeepSeek-R1 [DeepSeek AI, 2024], ChatGPT-4 [OpenAI, 2023], and ChatGPT-4.5 [OpenAI, 2024]) on a set of 2023 pediatric board examination preparation questions (2023 PREP Self-Assessment, American Academy of Pediatrics), a comprehensive resource containing case-based multiple-choice questions designed to simulate actual board examinations [<xref ref-type="bibr" rid="ref3">3</xref>]. We hypothesized that newer AI models would demonstrate improved accuracy on pediatric knowledge assessment, potentially approaching the performance levels of board-certified pediatricians taking certification examinations.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Overview</title><p>We conducted a comparative analysis of 3 advanced LLMS (DeepSeek-R1, ChatGPT-4, and ChatGPT-4.5) using a set of 266 questions from the 2023 PREP Self-Assessment from the American Academy of Pediatrics. In compliance with fair use copyright law and with methods deemed exempt by the Healthy Steps Pediatrics Ethics Committee, we entered 266 questions and answer choices from the Pediatrics 2023 PREP Self-Assessment into the 3 LLM platforms. DeepSeek-R1 (DeepSeek AI, 2024), ChatGPT-4 (OpenAI, 2023, gpt-4-turbo, 128k context window), and ChatGPT-4.5 (OpenAI, 2024, gpt-4.5-turbo, 128k context window) were accessed through their respective web interfaces in February 2025.</p><p>The 2023 PREP Self-Assessment was selected as it represents the most comprehensive and current pediatric board preparation resource, designed by the American Academy of Pediatrics to mirror the content, format, and difficulty of actual American Board of Pediatrics (ABP) examinations. The questions cover all major pediatric domains in proportions similar to the ABP content outline. The use of PREP questions was determined to constitute fair use for research purposes under 17U.S.C. &#x00A7;107, considering (1) noncommercial educational purpose, (2) factual nature of test questions, (3) limited amount used (266 of thousands of available questions), and (4) no market harm to the copyright holder. Questions were entered manually without reproducing answer explanations or proprietary content. As a subscription-based resource, the likelihood of PREP questions appearing verbatim in training datasets is low. However, we acknowledge that similar pediatric medical knowledge exists in publicly available resources like medical textbooks and journals that may have been included in model training.</p><p>Each AI model was presented with identical questions in their original multiple-choice format. All questions were text-based without images or clinical photographs. Each model was queried using standardized prompts: &#x201C;Please answer the following multiple-choice question by selecting the best answer: [question text].&#x201D; Default temperature settings were used (temperature=1.0 for ChatGPT models, default settings for DeepSeek-R1). No chain-of-thought or multistep reasoning prompts were used to maintain consistency across models. All queries were performed once without retries. Questions were presented sequentially without access to previous answers. Responses were collected and evaluated against the established correct answers. Performance was measured by calculating the percentage of correct responses for each model. In addition, 95% confidence intervals were calculated using the Wilson score method. Model performance differences were assessed using the McNemar test for paired comparisons.</p><p>To contextualize these findings, we compared the AI models&#x2019; performance to published data on first-time pass rates for board-certified pediatricians taking the ABP examination. This comparison provides a benchmark for evaluating the clinical relevance of AI performance in pediatric knowledge assessment. It is important to note that the human percentages reported by the ABP represent pass rates&#x2014;the proportion of examinees who achieve or exceed the passing threshold in a given year&#x2014;rather than the raw percentage of questions answered correctly. The ABP does not publicly release its exact passing cutoff, but historical reports and candidate feedback suggest that it is approximately equivalent to answering about 70% of questions correctly [<xref ref-type="bibr" rid="ref4">4</xref>]. Successful test takers often score well above this minimum, with average performance typically exceeding 80%. Therefore, while AI model performance in this study is expressed as the percentage of correct responses, the human figures used for comparison reflect an outcome-based measure (pass/fail) rather than direct accuracy.</p></sec><sec id="s2-2"><title>Ethical Considerations</title><p>The Healthy Steps Pediatrics Ethics Committee is an institutional committee that evaluates research proposals within our affiliated private practice network. This committee consists of 3 board-certified pediatricians who review research for ethical considerations. The committee determined this study was exempt from formal institutional review board approval as it involved publicly available AI tools and did not include human subjects or protected health information.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>The 3 AI models demonstrated marked differences in performance when tested on 266 pediatric board examination preparation questions. DeepSeek exhibited the highest accuracy at 98.1% (95% CI 95.7%&#x2010;99.4%; 261/266 correct responses), outperforming both ChatGPT models (<xref ref-type="table" rid="table1">Table 1</xref>). ChatGPT-4 achieved an accuracy of 82.7% (95% CI 77.7%&#x2010;87.0%; 220/266 correct responses), while ChatGPT-4.5 showed improvement over its predecessor, with approximately 96.6% accuracy (95% CI 93.7%&#x2010;98.4%), missing only 9 questions. The difference between DeepSeek-R1 and ChatGPT-4.5 was not statistically significant (<italic>P</italic>=.38, McNemar test).</p><p>Error pattern analysis revealed that AI models most commonly struggled with questions requiring integration of complex clinical presentations with rare disease knowledge (<xref ref-type="table" rid="table2">Table 2</xref>). For example, DeepSeek&#x2019;s 5 incorrect answers primarily involved metabolic disorders and rare genetic syndromes, particularly questions requiring correlation between subtle biochemical abnormalities and uncommon clinical presentations. ChatGPT models additionally struggled with complex medication dosing calculations and interpretation of pediatric growth parameters in the context of genetic disorders. Notably, there was minimal overlap in the specific questions missed by each model, suggesting that different LLMs have distinct knowledge gaps despite similar training paradigms.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Performance of large language models on 2023 Pediatric Board Examination Preparation Questions.<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Artificial intelligence model</td><td align="left" valign="bottom">Correct answers</td><td align="left" valign="bottom">Accuracy (%)</td><td align="left" valign="bottom">Comparison to ABP<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> pass rates<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">Deepseek-R1</td><td align="left" valign="top">261</td><td align="left" valign="top">98.1</td><td align="left" valign="top">Exceeds typical ABP pass rate</td></tr><tr><td align="left" valign="top">ChatGPT-4.5</td><td align="left" valign="top">257</td><td align="left" valign="top">96.6</td><td align="left" valign="top">Upper threshold of ABP pass rate</td></tr><tr><td align="left" valign="top">ChatGPT-4</td><td align="left" valign="top">220</td><td align="left" valign="top">82.7</td><td align="left" valign="top">Comparable to lower range of ABP pass rate</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>Each model was tested on 266 multiple-choice questions from the American Academy of Pediatrics 2023 PREP Self-Assessment. Accuracy was calculated as the percentage of correct responses. Performance is contextualized relative to the typical first-time pass rates (80%&#x2010;89%) for board-certified pediatricians on the ABP examination. DeepSeek-R1, ChatGPT-4, and ChatGPT-4.5 were tested on identical questions. Pass rates represent historical ABP first-time exam performance.</p></fn><fn id="table1fn2"><p><sup>b</sup>ABP: American Board of Pediatrics.</p></fn><fn id="table1fn3"><p><sup>c</sup>ABP first-time pass rates for board-certified pediatricians typically range from 80%&#x2010;89% (80% in 2022 and 89% in 2024 for general pediatrics) [<xref ref-type="bibr" rid="ref5">5</xref>].</p></fn></table-wrap-foot></table-wrap><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Error pattern analysis by knowledge domain.<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Knowledge domain</td><td align="left" valign="bottom">DeepSeek-R1 (N=5), n (%)</td><td align="left" valign="bottom">ChatGPT-4.5 (N=9), n (%)</td><td align="left" valign="bottom">ChatGPT-4 (N=46), n (%)</td></tr></thead><tbody><tr><td align="left" valign="top">Metabolic disorders</td><td align="left" valign="top">3 (60)</td><td align="left" valign="top">4 (44)</td><td align="left" valign="top">15 (33)</td></tr><tr><td align="left" valign="top">Rare genetic syndromes</td><td align="left" valign="top">2 (40)</td><td align="left" valign="top">2 (22)</td><td align="left" valign="top">12 (26)</td></tr><tr><td align="left" valign="top">Medication dosing</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">2 (22)</td><td align="left" valign="top">10 (22)</td></tr><tr><td align="left" valign="top">Growth parameters</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">1 (11)</td><td align="left" valign="top">9 (20)</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>Percentages indicate proportion of total errors for each model.</p></fn></table-wrap-foot></table-wrap><p>These results were compared to the published first-time pass rates for board-certified pediatricians taking the ABP examination, which typically range from 80%&#x2010;89% (80% in 2022 and 89% in 2024 for general pediatrics) [<xref ref-type="bibr" rid="ref5">5</xref>]. As illustrated in <xref ref-type="fig" rid="figure1">Figure 1</xref>, DeepSeek&#x2019;s performance exceeded the typical range for human pediatricians on first-attempt board examinations, while ChatGPT-4.5 also performed at the upper threshold of human performance. ChatGPT-4&#x2019;s performance was comparable to the lower range of human pass rates.</p><p>These findings demonstrate substantial variability in AI model performance on pediatric knowledge assessment, with newer models demonstrating substantial capabilities on pediatric board questions. The following discussion contextualizes these results within the broader landscape of AI in medical education and clinical practice.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Accuracy of large language models on pediatric board examination preparation questions from the 2023 PREP Self-Assessment. ChatGPT-4, ChatGPT-4.5, and DeepSeek-R1 were each tested on 266 multiple-choice questions. The shaded area represents the typical first-time pass rate range (80%&#x2010;89%) for board-certified pediatricians on the ABP examination from 2022 to 2024. DeepSeek-R1 achieved the highest performance at 98.1%, exceeding the typical ABP pass rate range. ABP: American Board of Pediatrics.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e76056_fig01.png"/></fig></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><p>Our findings demonstrate that recent advancements in LLMs have produced AI systems capable of performing at or above the level of board-certified pediatricians on standardized examination questions. DeepSeek&#x2019;s exceptional performance (98.1% accuracy) represents a significant milestone in AI medical knowledge representation, exceeding typical ABP pass rates. The substantial performance gap between AI models highlights the rapid evolution of these technologies, with newer iterations showing marked improvements compared to older versions [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref6">6</xref>].</p><p>These results have important implications for medical education, board examination preparation, and potentially clinical decision support. AI models could serve as supplementary educational tools for pediatric trainees, offering accurate content knowledge while human educators focus on clinical reasoning, ethics, and patient communication skills that remain challenging for AI systems [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>].</p><p>AI models could revolutionize medical education through personalized learning pathways, instant feedback on clinical reasoning, and simulation of rare cases [<xref ref-type="bibr" rid="ref9">9</xref>]. However, critical limitations remain in areas requiring human judgment, empathy, and ethical decision-making. For instance, while AI excels at factual recall, it cannot replicate the nuanced patient interactions, cultural sensitivity, or ethical reasoning essential to pediatric practice [<xref ref-type="bibr" rid="ref10">10</xref>]. Future applications should focus on AI as a supportive tool that enhances rather than replaces traditional medical education, particularly in areas like case-based learning, differential diagnosis practice, and board examination preparation [<xref ref-type="bibr" rid="ref11">11</xref>].</p><p>Limitations of this study include the use of multiple-choice questions rather than free-response clinical scenarios and the focus on knowledge recall rather than practical clinical decision-making. We cannot determine whether the AI models&#x2019; performance reflects true clinical reasoning or pattern recognition based on similar questions in their training data. Additionally, while PREP Self-Assessment questions are designed to simulate board examinations, they may differ in difficulty and content distribution from actual ABP examinations, complicating direct comparisons with human pass rates. Important limitations exist in comparing AI performance to human ABP pass rates. The ABP examination involves 330 questions administered under timed, proctored conditions with associated stress factors, while our AI evaluation used 266 questions without time constraints or test-taking pressure. Additionally, human physicians integrate years of clinical experience, ethical reasoning, and patient interaction skills that are not assessed in multiple-choice formats. Therefore, while our results demonstrate strong knowledge recall by AI models, they should not be interpreted as evidence of superior clinical competence. Furthermore, these models have not been tested on their ability to gather historical information, perform physical examinations, or develop appropriate management plans in real clinical settings [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. Future research should evaluate these AI systems on more complex clinical reasoning tasks and directly compare their performance to practicing pediatricians in simulated clinical scenarios.</p></sec></body><back><fn-group><fn fn-type="con"><p>MM, AI, and AH conceptualized and designed the study, drafted the initial manuscript, critically reviewed and revised the manuscript, designed the data collection instruments, collected data, carried out the initial analyses, and critically reviewed and revised the manuscript. All authors approved the final manuscript as submitted and agree to be accountable for all aspects of the work.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">ABP</term><def><p>American Board of Pediatrics</p></def></def-item><def-item><term id="abb2">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb3">LLM</term><def><p>large language model</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moor</surname><given-names>M</given-names> </name><name name-style="western"><surname>Banerjee</surname><given-names>O</given-names> </name><name name-style="western"><surname>Abad</surname><given-names>ZSH</given-names> </name><etal/></person-group><article-title>Foundation models for generalist medical artificial intelligence</article-title><source>Nature New Biol</source><year>2023</year><month>04</month><volume>616</volume><issue>7956</issue><fpage>259</fpage><lpage>265</lpage><pub-id pub-id-type="doi">10.1038/s41586-023-05881-4</pub-id><pub-id pub-id-type="medline">37045921</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rajpurkar</surname><given-names>P</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>E</given-names> </name><name name-style="western"><surname>Banerjee</surname><given-names>O</given-names> </name><name name-style="western"><surname>Topol</surname><given-names>EJ</given-names> </name></person-group><article-title>AI in health and medicine</article-title><source>Nat Med</source><year>2022</year><month>01</month><volume>28</volume><issue>1</issue><fpage>31</fpage><lpage>38</lpage><pub-id pub-id-type="doi">10.1038/s41591-021-01614-0</pub-id><pub-id pub-id-type="medline">35058619</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="web"><article-title>2023 PREP self-assessment</article-title><source>American Academy of Pediatrics</source><access-date>2025-04-03</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.aap.org/en/catalog/categories/maintenance-of-certification/2023-prep-self-assessment/">https://www.aap.org/en/catalog/categories/maintenance-of-certification/2023-prep-self-assessment/</ext-link></comment></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Le</surname><given-names>M</given-names> </name><name name-style="western"><surname>Davis</surname><given-names>M</given-names> </name></person-group><article-title>ChatGPT yields a passing score on a pediatric board preparatory exam but raises red flags</article-title><source>Glob Pediatr Health</source><year>2024</year><volume>11</volume><fpage>2333794X241240327</fpage><pub-id pub-id-type="doi">10.1177/2333794X241240327</pub-id><pub-id pub-id-type="medline">38529337</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="web"><article-title>Exam pass rates</article-title><source>The American Board of Pediatrics</source><access-date>2025-04-03</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.abp.org/content/exam-pass-rates">https://www.abp.org/content/exam-pass-rates</ext-link></comment></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gritti</surname><given-names>MN</given-names> </name><name name-style="western"><surname>AlTurki</surname><given-names>H</given-names> </name><name name-style="western"><surname>Farid</surname><given-names>P</given-names> </name><name name-style="western"><surname>Morgan</surname><given-names>CT</given-names> </name></person-group><article-title>Progression of an artificial intelligence chatbot (ChatGPT) for pediatric cardiology educational knowledge assessment</article-title><source>Pediatr Cardiol</source><year>2024</year><month>02</month><volume>45</volume><issue>2</issue><fpage>309</fpage><lpage>313</lpage><pub-id pub-id-type="doi">10.1007/s00246-023-03385-6</pub-id><pub-id pub-id-type="medline">38170274</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ramgopal</surname><given-names>S</given-names> </name><name name-style="western"><surname>Sanchez-Pinto</surname><given-names>LN</given-names> </name><name name-style="western"><surname>Horvat</surname><given-names>CM</given-names> </name><name name-style="western"><surname>Carroll</surname><given-names>MS</given-names> </name><name name-style="western"><surname>Luo</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Florin</surname><given-names>TA</given-names> </name></person-group><article-title>Artificial intelligence-based clinical decision support in pediatrics</article-title><source>Pediatr Res</source><year>2023</year><month>01</month><volume>93</volume><issue>2</issue><fpage>334</fpage><lpage>341</lpage><pub-id pub-id-type="doi">10.1038/s41390-022-02226-1</pub-id><pub-id pub-id-type="medline">35906317</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shah</surname><given-names>N</given-names> </name><name name-style="western"><surname>Arshad</surname><given-names>A</given-names> </name><name name-style="western"><surname>Mazer</surname><given-names>MB</given-names> </name><name name-style="western"><surname>Carroll</surname><given-names>CL</given-names> </name><name name-style="western"><surname>Shein</surname><given-names>SL</given-names> </name><name name-style="western"><surname>Remy</surname><given-names>KE</given-names> </name></person-group><article-title>The use of machine learning and artificial intelligence within pediatric critical care</article-title><source>Pediatr Res</source><year>2023</year><month>01</month><volume>93</volume><issue>2</issue><fpage>405</fpage><lpage>412</lpage><pub-id pub-id-type="doi">10.1038/s41390-022-02380-6</pub-id><pub-id pub-id-type="medline">36376506</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>W</given-names> </name><name name-style="western"><surname>Cai</surname><given-names>M</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>HJ</given-names> </name><name name-style="western"><surname>Evans</surname><given-names>R</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>C</given-names> </name><name name-style="western"><surname>Ming</surname><given-names>C</given-names> </name></person-group><article-title>AI in medical education: global situation, effects and challenges</article-title><source>Educ Inf Technol</source><year>2024</year><month>03</month><volume>29</volume><issue>4</issue><fpage>4611</fpage><lpage>4633</lpage><pub-id pub-id-type="doi">10.1007/s10639-023-12009-8</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bhargava</surname><given-names>H</given-names> </name><name name-style="western"><surname>Salomon</surname><given-names>C</given-names> </name><name name-style="western"><surname>Suresh</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Promises, pitfalls, and clinical applications of artificial intelligence in pediatrics</article-title><source>J Med Internet Res</source><year>2024</year><month>02</month><day>29</day><volume>26</volume><fpage>e49022</fpage><pub-id pub-id-type="doi">10.2196/49022</pub-id><pub-id pub-id-type="medline">38421690</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sisk</surname><given-names>BA</given-names> </name><name name-style="western"><surname>Antes</surname><given-names>AL</given-names> </name><name name-style="western"><surname>DuBois</surname><given-names>JM</given-names> </name></person-group><article-title>An overarching framework for the ethics of artificial intelligence in pediatrics</article-title><source>JAMA Pediatr</source><year>2024</year><month>03</month><day>1</day><volume>178</volume><issue>3</issue><fpage>213</fpage><lpage>214</lpage><pub-id pub-id-type="doi">10.1001/jamapediatrics.2023.5761</pub-id><pub-id pub-id-type="medline">38165711</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Booven</surname><given-names>DV</given-names> </name><name name-style="western"><surname>Cheng-Bang</surname><given-names>C</given-names> </name><name name-style="western"><surname>Meenakshy</surname><given-names>M</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Arora</surname><given-names>H</given-names> </name></person-group><article-title>Chapter 8 - limitations of artificial intelligence in healthcare</article-title><source>Artificial Intelligence in Urologic Malignancies</source><year>2025</year><publisher-name>Academic Press</publisher-name><fpage>231</fpage><lpage>246</lpage><pub-id pub-id-type="doi">10.1016/B978-0-443-15504-8.00008-9</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zavr&#x0161;nik</surname><given-names>J</given-names> </name><name name-style="western"><surname>Kokol</surname><given-names>P</given-names> </name><name name-style="western"><surname>&#x017D;lahti&#x010D;</surname><given-names>B</given-names> </name><name name-style="western"><surname>Bla&#x017E;un Vo&#x0161;ner</surname><given-names>H</given-names> </name></person-group><article-title>Artificial intelligence and pediatrics: synthetic knowledge synthesis</article-title><source>Electronics (Basel)</source><year>2024</year><volume>13</volume><issue>3</issue><fpage>512</fpage><pub-id pub-id-type="doi">10.3390/electronics13030512</pub-id></nlm-citation></ref></ref-list></back></article>