<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="review-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR AI</journal-id><journal-id journal-id-type="publisher-id">ai</journal-id><journal-id journal-id-type="index">41</journal-id><journal-title>JMIR AI</journal-title><abbrev-journal-title>JMIR AI</abbrev-journal-title><issn pub-type="epub">2817-1705</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v5i1e80928</article-id><article-id pub-id-type="doi">10.2196/80928</article-id><article-categories><subj-group subj-group-type="heading"><subject>Review</subject></subj-group></article-categories><title-group><article-title>AI in Point-of-Care Imaging for Clinical Decision Support: Systematic Review of Diagnostic Accuracy, Task-Shifting, and Explainability</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Wadie</surname><given-names>Peter</given-names></name><degrees>BSc</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Zakher</surname><given-names>Bishoy</given-names></name><degrees>MB BChir</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Elgazzar</surname><given-names>Khalid</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Alsbakhi</surname><given-names>Abdulhamid</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Alhejaily</surname><given-names>Abdul-Mohsen G</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Electrical, Computer, and Software Engineering, Faculty of Engineering and Applied Science, Ontario Tech University</institution><addr-line>2000 Simcoe Street North</addr-line><addr-line>Oshawa</addr-line><addr-line>ON</addr-line><country>Canada</country></aff><aff id="aff2"><institution>Department of Biomedical Engineering, Faculty of Engineering, University of Alberta</institution><addr-line>Edmonton</addr-line><addr-line>AB</addr-line><country>Canada</country></aff><aff id="aff3"><institution>Canadian University Dubai, School of Engineering, Applied Science and Technology</institution><addr-line>Dubai</addr-line><country>United Arab Emirates</country></aff><aff id="aff4"><institution>Riyadh Second Health Cluster</institution><addr-line>Riyadh</addr-line><country>Saudi Arabia</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Raisaro</surname><given-names>Jean-Louis</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Pant</surname><given-names>Dewank</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Grosser</surname><given-names>John</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Banerjee</surname><given-names>Somnath</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Liang</surname><given-names>Xiaolong</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Peter Wadie, BSc, Department of Electrical, Computer, and Software Engineering, Faculty of Engineering and Applied Science, Ontario Tech University, 2000 Simcoe Street North, Oshawa, ON, L1G 0C5, Canada, 1 9059246707; <email>peter.wadie@ontariotechu.net</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>27</day><month>4</month><year>2026</year></pub-date><volume>5</volume><elocation-id>e80928</elocation-id><history><date date-type="received"><day>18</day><month>07</month><year>2025</year></date><date date-type="rev-recd"><day>12</day><month>01</month><year>2026</year></date><date date-type="accepted"><day>07</day><month>02</month><year>2026</year></date></history><copyright-statement>&#x00A9; Peter Wadie, Bishoy Zakher, Khalid Elgazzar, Abdulhamid Alsbakhi, Abdul-Mohsen G Alhejaily. Originally published in JMIR AI (<ext-link ext-link-type="uri" xlink:href="https://ai.jmir.org">https://ai.jmir.org</ext-link>), 27.4.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR AI, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.ai.jmir.org/">https://www.ai.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://ai.jmir.org/2026/1/e80928"/><abstract><sec><title>Background</title><p>Artificial intelligence (AI) integrated with point-of-care imaging is a promising approach to expand access in settings with limited specialist availability. However, no systematic review has comprehensively evaluated AI-assisted clinical decision support across multiple point-of-care imaging modalities, assessed explainability implementation, or quantified clinical impact evidence gaps.</p></sec><sec><title>Objective</title><p>We aim to systematically evaluate and synthesize evidence on AI-based clinical decision support systems using point-of-care imaging.</p></sec><sec sec-type="methods"><title>Methods</title><p>We searched PubMed, Scopus, IEEE Xplore, and Web of Science (January 2018 to November 2025). We included research studies evaluating AI or machine learning systems applied to point-of-care&#x2013;capable imaging modalities in clinical settings with clinical decision support outputs. Two reviewers independently screened studies, extracted data across 15 domains, and assessed methodological quality using QUADAS-2 (Quality Assessment of Diagnostic Accuracy Studies 2). Proposed frameworks were developed to evaluate explainability implementation and clinical impact evidence. Narrative synthesis was performed due to substantial data heterogeneity.</p></sec><sec sec-type="results"><title>Results</title><p>Of 2113 records identified, 20 studies met inclusion criteria, encompassing approximately 78,000 patients across 15 countries. Studies evaluated tuberculosis (n=5), breast cancer (n=3), deep vein thrombosis (DVT) (n=2), and 9 other conditions using ultrasound (7/20, 35%), chest x-ray (5/20, 25%), photography-based and colposcopic imaging (3/20, 15%), fundus photography (2/20, 10%), microscopy (2/20, 10%), and dermoscopy (1/20, 5%). Median sensitivity was 93.6% (IQR 87%-98%), and median specificity was 90.6% (IQR 74.5%-96.7%). Task-shifting was demonstrated in 65% (13/20) of studies, with nonspecialists achieving specialist-level performance after a median of 1 hour of training (range 30 minutes to 6 months; n=6 studies reporting specific durations). The explainable artificial intelligence (XAI) implementation cascade revealed critical gaps: 75% (15/20) of studies did not mention explainability, 10% (2/20) provided explanations to users, and none evaluated whether clinicians understood explanations or whether XAI influenced decisions. The clinical impact pyramid showed 15% (3/20) of studies reported technical accuracy only, 65% (13/20) reported process outcomes, 20% (4/20) documented clinical actions, and none measured patient outcomes. Methodological quality was concerning, as 70% (14/20) of studies were at high or very high risk of bias, with verification bias (14/20, 70%) and selection bias (10/20, 50%) being the most common. The overall certainty of evidence was very low&#x2014;GRADE (Grading of Recommendations, Assessment, Development, and Evaluation) &#x2295;&#x25EF;&#x25EF;&#x25EF;, primarily due to risk of bias, heterogeneity, and imprecision.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>AI-assisted point-of-care imaging demonstrates promising diagnostic accuracy and enables meaningful task-shifting with minimal training requirements. However, critical evidence gaps remain, including absent patient outcome measurement, inadequate explainability evaluation, regulatory misalignment, and lack of cross-context validation despite claims of global applicability. Addressing these gaps requires implementation research with patient-outcome end points, rigorous XAI evaluation, and multicontext validation before widespread adoption. Limitations include restriction to English-language publications, gray literature exclusion, and heterogeneity precluding meta-analysis.</p></sec></abstract><kwd-group><kwd>systematic reviews as topic</kwd><kwd>machine learning</kwd><kwd>diagnostic imaging</kwd><kwd>explainability</kwd><kwd>task shifting</kwd><kwd>artificial intelligence</kwd><kwd>AI</kwd><kwd>explainable AI</kwd><kwd>XAI</kwd><kwd>clinical decision support systems</kwd><kwd>CDSS</kwd><kwd>point-of-care systems</kwd><kwd>POC</kwd><kwd>mobile phone</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>The intersection of artificial intelligence (AI), point-of-care imaging, and clinical decision support systems (CDSSs) represents a rapidly evolving field with significant potential to transform health care delivery, particularly in settings where specialist access is limited. Point-of-care imaging devices&#x2014;including handheld ultrasound, smartphone-based dermoscopy, portable fundus cameras, and mobile x-ray systems&#x2014;are proliferating worldwide; however, their diagnostic potential is often limited by the scarcity of trained specialists to interpret images. AI-based clinical decision support offers a potential solution by analyzing medical images in real time, providing automated diagnostic classifications, highlighting regions of interest, and generating confidence scores that enable nonspecialist health care workers to obtain accurate diagnostic interpretations at the point of care.</p><p>To contextualize this review within the existing evidence base, we searched for prior systematic reviews addressing similar questions by applying our PubMed search strategy with filters isolating systematic reviews and meta-analyses, yielding 4 relevant publications. Kossoff et al [<xref ref-type="bibr" rid="ref1">1</xref>] reviewed automated lung ultrasound analysis for pneumothorax detection, which is limited to a single modality and condition, without requiring point-of-care validation. Cold et al [<xref ref-type="bibr" rid="ref2">2</xref>] examined AI in bronchoscopy, focusing on specialist procedural settings rather than point-of-care contexts. Rambabu et al [<xref ref-type="bibr" rid="ref3">3</xref>] evaluated AI for papilloedema detection using fundus photography, although the studies were predominantly retrospective and lacked clinical outcome assessment. Sunny et al [<xref ref-type="bibr" rid="ref4">4</xref>] reviewed biomarker-integrated cytopathology for oral lesion detection; however, this laboratory-based methodology differs fundamentally from the deployment of point-of-care imaging. Critically, none evaluated task-shifting potential, explainable artificial intelligence (XAI) implementation frameworks, or clinical decision support integration&#x2014;essential considerations for resource-limited settings where such technologies hold the most significant promise.</p><p>The rationale for this systematic review emerges from 3 converging factors. First, regulatory frameworks increasingly mandate explainability for high-risk AI systems. The European Union&#x2019;s AI Act classifies medical diagnostic systems as high-risk applications requiring transparency and interpretability [<xref ref-type="bibr" rid="ref5">5</xref>]. The US Food and Drug Administration&#x2019;s (FDA&#x2019;s) 2021 action plan for AI- or machine learning (ML)&#x2013;based medical devices emphasizes the need for transparency regarding algorithmic decision-making logic [<xref ref-type="bibr" rid="ref6">6</xref>]. Subsequent 2024 guidance explicitly defines &#x201C;explainability&#x201D; as the degree to which AI logic can be understood [<xref ref-type="bibr" rid="ref7">7</xref>]. The 21st Century Cures Act specifies that clinical decision support tools must enable health care professionals to independently review the basis for recommendations to qualify for regulatory exemption [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. This evolving regulatory landscape creates an urgent need for systematic evidence synthesis regarding whether XAI is being implemented in clinical practice and, critically, whether the explanations implemented are evaluated for their clinical utility. Second, the field is rapidly changing, with most relevant studies published since 2018 reflecting advances in deep learning architectures. Third, there is a critical need to characterize methodological quality, particularly in terms of external validation and clinical outcome assessment beyond diagnostic accuracy.</p><p>This review addresses identified gaps through five key differentiators: (1) multimodality scope that synthesizes evidence across all point-of-care-capable imaging modalities; (2) explicit CDSS framework assessment that examines how AI outputs integrate into clinical workflows; (3) systematic explainability (XAI) evaluation using a proposed cascade framework; (4) strict point-of-care setting requirement that excludes traditional radiology contexts; and (5) rigorous quality assessment using QUADAS-2 (Quality Assessment of Diagnostic Accuracy Studies 2) with attention to AI-specific bias concerns. These methodological features enable a comprehensive evaluation of both diagnostic performance and implementation readiness.</p><p>This systematic review aimed to evaluate AI-based CDSSs using point-of-care imaging for diagnostic purposes comprehensively. Using the PCC (Population, Concept, Context) framework, our primary objective was to systematically identify, evaluate, and synthesize evidence on these systems in human participants (population) across point-of-care clinical settings (context), with particular focus on diagnostic accuracy, clinical integration, and real-world implementation. Secondary objectives were to (1) characterize AI architectures, imaging modalities, and clinical domains; (2) assess reported diagnostic performance; (3) evaluate methodological quality using QUADAS-2; (4) examine the extent of XAI implementation and validation; and (5) identify evidence gaps to inform future research, practice, and policy.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design</title><p>This systematic review was conducted following the PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) 2020 guidelines (<xref ref-type="supplementary-material" rid="app9">Checklist 1</xref>). The review was not prospectively registered due to time constraints. A detailed protocol was developed a priori and is available from the corresponding author upon request.</p></sec><sec id="s2-2"><title>Eligibility Criteria</title><sec id="s2-2-1"><title>Overview</title><p>We used the PCC framework to define eligibility. The population comprised primary research studies (January 2018 to November 2025) evaluating AI or ML systems in human participants. The concept was AI-based clinical decision support using point-of-care imaging for diagnostic, classification, triage, or risk stratification purposes. The context included point-of-care clinical settings, such as emergency departments, intensive care units, primary care facilities, prehospital settings, low-resource environments, community health centers, mobile screening units, home care settings, and telemedicine applications.</p></sec><sec id="s2-2-2"><title>Inclusion Criteria Required</title><p>Inclusion criteria required (1) original primary research or peer-reviewed full conference papers (greater than or equal to four pages with medical imaging focus); (2) full text being available in English; (3) publication year 2018&#x2010;2025; (4) evaluation of AI or ML systems (deep learning architectures or traditional ML methods); (5) point-of-care-capable imaging modalities (including ultrasound, dermoscopy, fundus photography, portable x-ray, smartphone-based imaging and portable microscopy); (6) actual deployment or validation in point-of-care settings (not aspirational point-of-care claims); (7) clinical decision support outputs (diagnosis, classification, triage, or screening&#x2014;not purely technical image improvement); and (8) human participants research with clinical validation on patients.</p></sec><sec id="s2-2-3"><title>Exclusion Criteria Applied</title><p>Exclusion criteria applied to (1) systematic reviews, meta-analyses, scoping reviews, literature reviews, editorials, commentaries, letters, book chapters, case reports with fewer than three patients, workshop papers, extended abstracts under four pages, poster abstracts, or preprints without subsequent peer-reviewed publication; (2) non-English language publications; (3) publications from 2017 or earlier; (4) the absence of AI or ML components, including studies with only statistical analysis, threshold-based algorithms, or pure image processing without ML; (5) non&#x2013;point-of-care imaging modalities (computed tomography, magnetic resonance imaging, PET, SPECT, traditional mammography, fixed x-ray, or laboratory-based histopathology); (6) studies conducted in non&#x2013;point-of-care settings such as radiology departments, pathology laboratories, or specialist referral centers without point-of-care validation; (7) purely technical outputs including image enhancement, denoising, super-resolution, reconstruction, registration, or segmentation without clinical interpretation; (8) animal-only studies, phantom-only studies, or purely synthetic or simulated data without human validation; and (9) full text being unavailable or inaccessible.</p><p>To address challenges in distinguishing genuine point-of-care implementation from aspirational claims, we applied supplementary screening criteria excluding (1) aspirational point-of-care mentions without actual deployment or validation, (2) public dataset studies without clinical validation, (3) algorithm development without patient-level testing, (4) retrospective analysis of archived images, (5) device development without point-of-care clinical testing, (6) unverified point-of-care workflows, (7) purely technical improvement studies, (8) ambiguous facility types, and (9) retrospective comparisons without prospective validation.</p></sec></sec><sec id="s2-3"><title>Ethical Considerations</title><p>This systematic review analyzed published data without direct human interaction. Therefore, institutional review board approval was not required per standard guidelines.</p></sec><sec id="s2-4"><title>Information Sources</title><p>We searched 4 electronic databases on November 24, 2025: PubMed/MEDLINE (via the National Center for Biotechnology Information interface), Scopus (via the Elsevier platform), IEEE Xplore (via the IEEE platform), and Web of Science Core Collection (via the Clarivate platform). Gray literature, trial registers, and preprint servers were not searched. Neither backward citation searching nor forward citation searching was performed.</p></sec><sec id="s2-5"><title>Search Strategy</title><p>Search strategies combined 4 concept blocks using Boolean AND operators: (1) AI or ML technology terms (eg, &#x201C;artificial intelligence,&#x201D; &#x201C;machine learning,&#x201D; &#x201C;deep learning,&#x201D; &#x201C;neural networks,&#x201D; and &#x201C;computer vision&#x201D;); (2) clinical decision support terms (eg, &#x201C;decision support,&#x201D; &#x201C;clinical decision support,&#x201D; &#x201C;CDSS,&#x201D; &#x201C;computer-aided detection,&#x201D; and &#x201C;diagnostic aids&#x201D;); (3) medical imaging modality terms (eg, &#x201C;ultrasound,&#x201D; &#x201C;dermoscopy,&#x201D; &#x201C;fundus imaging,&#x201D; &#x201C;microscopy,&#x201D; &#x201C;x-ray,&#x201D; and &#x201C;clinical photography&#x201D;); and (4) point-of-care context terms (eg, &#x201C;point-of-care,&#x201D; &#x201C;POCUS,&#x201D; &#x201C;bedside,&#x201D; &#x201C;handheld,&#x201D; &#x201C;portable,&#x201D; &#x201C;mobile health,&#x201D; and &#x201C;telemedicine&#x201D;). No language restrictions were applied to the database searches, although only English-language reports were eligible for inclusion. Searches were limited to studies published from January 2018 onward to capture the recent era of deep learning applications in medical imaging. The search strategy was adapted to each database&#x2019;s syntax requirements, with PubMed incorporating MeSH (Medical Subject Headings) terms combined with title or abstract keywords, Scopus using TITLE-ABS-KEY field tags, IEEE Xplore using explicit singular or plural forms, and Web of Science using topic search field tags. Complete search strategies for all databases, including exact queries and filters, are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec><sec id="s2-6"><title>Study Selection</title><p>Screening and selection followed a 2-stage process. Search results were imported into Zotero (Corporation for Digital Scholarship) for deduplication and reference management. A total of 2 reviewers independently screened titles and abstracts using Rayyan (Rayyan Systems Inc) software with anonymous dual review. Studies meeting the inclusion criteria or with uncertain eligibility proceeded to full-text review. A total of 2 reviewers independently assessed full texts, with disagreements resolved through discussion. No automation tools were used to support title or abstract screening or full-text assessment; human reviewers made all screening decisions. When multiple exclusion criteria were applied to a single study, the primary reason for exclusion was recorded following a prespecified hierarchy: first, study type, language, date, and availability issues; then, population characteristics; and finally, intervention criteria.</p><p>A representative sample of excluded studies, along with the reasons for exclusion, is provided in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p></sec><sec id="s2-7"><title>Data Collection Process</title><p>A total of 2 reviewers independently extracted data from each included study using a standardized form encompassing 18 prespecified sections: (1) study registry, (2) study design, (3) population, (4) point-of-care setting, (5) operator characteristics, (6) AI system, (7) AI output, (8) imaging, (9) reference standard, (10) diagnostic performance, (11) comparator, (12) explainability, (13) clinical outcomes, (14) limitations, (15) task-shifting, (16) integration, (17) QUADAS-2 summary, and (18) derived metrics (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>). Disagreements were resolved through discussion. No automation tools were used in the data extraction process. Study authors were not contacted to obtain or confirm missing or unclear data. Unreported items were coded as &#x201C;not reported,&#x201D; and strict anti-inference rules prohibited any imputation or assumption of unreported data.</p></sec><sec id="s2-8"><title>Data Items</title><p>Data extracted from the 18 prespecified sections of the standardized form were organized for synthesis into thematic domains encompassing study characteristics, point-of-care context, AI system features, diagnostic performance, and clinical impact.</p></sec><sec id="s2-9"><title>Study, Clinical, and AI System Characteristics</title><p>Extracted variables encompassed study characteristics (author, year, country, setting, design, sample size, condition, and prevalence), point-of-care context (facility type, geographic setting, and World Bank income classification), operator characteristics (profession, experience, and training), and AI system features (architecture, regulatory status, modality, device type, and processing capability).</p></sec><sec id="s2-10"><title>Diagnostic Performance Outcomes</title><p>Primary outcomes were sensitivity and specificity; secondary outcomes included area under the receiver operating characteristic curve (AUC), positive predictive value (PPV), and negative predictive value (NPV), with 95% CIs where reported. For studies reporting multiple thresholds, we extracted values at the author-identified optimal threshold. No amendments were made to outcome definitions from the a priori protocol.</p></sec><sec id="s2-11"><title>Additional Outcomes and Contextual Variables</title><p>Additional items included comparator characteristics, explainability assessment (technique, user presentation, understanding, and decision impact), clinical outcomes (workflow, referrals, time, cost, and patient outcomes), task-shifting demonstrations, and training requirements.</p></sec><sec id="s2-12"><title>Study Risk of Bias Assessment</title><p>Methodological quality was assessed using QUADAS-2 across 4 domains: patient selection, index test, reference standard, and flow and timing. Overall risk of bias (RoB) was synthesized using predefined criteria: low (all domains low), low-moderate (1 unclear), moderate (1 high or multiple unclear), high (2 or more high), or very high (3 or more high or critical flaws). A total of 2 reviewers independently assessed each study, with disagreements resolved through discussion. Study authors were not contacted; no automation tools were used.</p></sec><sec id="s2-13"><title>Synthesis Methods</title><sec id="s2-13-1"><title>Data Preparation and Eligibility for Synthesis</title><p>All 20 studies were eligible for narrative synthesis; meta-analysis was precluded by substantial clinical heterogeneity (12 conditions, 6 modalities, and diverse settings and operators) and methodological heterogeneity (varying designs, reference standards, and thresholds). Only explicitly reported values were extracted without imputation or transformation. Studies with multiple subgroups or time points were synthesized using author-identified primary values.</p></sec><sec id="s2-13-2"><title>Synthesis Approach</title><p>Data were synthesized narratively, organized around themes aligned with the review&#x2019;s secondary objectives: (1) AI system and imaging characteristics, (2) diagnostic performance, (3) task-shifting demonstrations and training requirements, (4) explainability implementation, (5) clinical outcomes beyond diagnostic accuracy, and (6) methodological quality. Studies were grouped for synthesis based on prespecified characteristics extracted during data collection: clinical condition, imaging modality, resource context (World Bank income classification), operator type (specialist vs nonspecialist), and methodological quality strata (QUADAS-2 overall RoB). The decision process for creating synthesis groupings was determined a priori through the standardized data extraction domains, with studies categorized according to their extracted characteristics. Formal statistical tests for heterogeneity were not applicable given the narrative synthesis approach and absence of meta-analysis.</p></sec><sec id="s2-13-3"><title>Subgroup Analyses</title><p>Prespecified subgroup analyses examined patterns across clinical conditions, imaging modalities, resource contexts (high-income country [HIC] vs lower-middle-income country [LMIC] or low-income country [LIC]), operator types, and methodological quality strata. Task-shifting and training requirements were characterized through tabulation. Explainability and clinical impact were categorized using the proposed XAI cascade and clinical impact pyramid frameworks.</p></sec><sec id="s2-13-4"><title>Sensitivity Analyses</title><p>Sensitivity analysis assessed the robustness of diagnostic performance findings by restricting the analysis to studies with a low or low-moderate overall RoB, as assessed by the QUADAS-2. This quality-restricted analysis compared performance ranges (minimum-maximum) and central tendency measures (median) between the complete evidence base and the quality-restricted subset to evaluate the influence of methodological quality on reported diagnostic accuracy estimates. The results were compared descriptively without formal statistical testing.</p></sec></sec><sec id="s2-14"><title>Data Presentation Methods</title><p>The results are presented in descriptive tables and figures including a PRISMA flow diagram, geographic distribution map, task-shifting illustration, forest plots grouped by condition, XAI cascade and clinical impact pyramid distributions, and QUADAS-2 visualizations.</p></sec><sec id="s2-15"><title>Proposed Synthesis Frameworks</title><p>A total of 2 frameworks were developed to systematically characterize evidence gaps not captured by standard diagnostic accuracy reporting. The first framework, an XAI implementation cascade (levels 0&#x2010;5), categorizes the depth of explainability implementation from nonmention through clinical user exposure to evaluation of whether explanations influenced clinical decisions. The second framework, a clinical impact pyramid (levels 0&#x2010;5), categorizes the maturity of clinical evidence from technical validation toward demonstration of health benefit. Diagnostic accuracy (level 0) is positioned at the pyramid&#x2019;s base because, while foundational to any AI diagnostic system, it represents the least mature form of clinical evidence&#x2014;demonstrating that a system works technically without establishing whether it improves patient outcomes. Process outcomes such as workflow efficiency and time to diagnosis occupy level 1. Clinical actions documented based on AI recommendations constitute level 2. Patient outcomes measured represent level 3, health system impact represents level 4, and population health outcomes represent level 5. These frameworks enabled systematic identification of evidence gaps and regulatory alignment assessment.</p></sec><sec id="s2-16"><title>Reporting Bias Assessment</title><p>Reporting bias was assessed qualitatively, given the narrative synthesis approach and substantial heterogeneity that precluded meta-analysis. The assessment examined (1) literature search comprehensiveness, including gray literature coverage; (2) patterns potentially indicating publication bias, including commercial system predominance and small-study effects; and (3) selective outcome reporting completeness for diagnostic accuracy metrics (sensitivity, specificity, AUC, PPV, and NPV) and precision estimates (95% CIs). Formal statistical methods for detecting publication bias (eg, Deeks funnel plot asymmetry testing) were not feasible; however, a qualitative assessment was conducted to characterize potential biases that may have affected the evidence base. A total of 2 reviewers independently assessed the reporting bias, with disagreements resolved through discussion.</p></sec><sec id="s2-17"><title>Certainty of Evidence Assessment</title><p>The certainty of evidence was assessed using the GRADE (Grading of Recommendations, Assessment, Development, and Evaluation) framework, adapted for diagnostic test accuracy studies. Following GRADE guidance, diagnostic accuracy studies are initiated at a high level of certainty. They are rated down across 5 domains: RoB (informed by QUADAS-2 assessments), indirectness (applicability and heterogeneity), inconsistency (variation in diagnostic performance), imprecision (CI width and sample size), and publication bias (completeness of literature search and selective reporting). Each domain was rated as no serious concern (no downgrade), serious concern (downgrade by 1 level), or grave concern (downgrade by 2 levels). Overall certainty was determined by summing downgrades across domains, with final ratings of high (&#x2295;&#x2295;&#x2295;&#x2295;), moderate (&#x2295;&#x2295;&#x2295;&#x25EF;), low (&#x2295;&#x2295;&#x25EF;&#x25EF;), or very low (&#x2295;&#x25EF;&#x25EF;&#x25EF;). A total of 2 reviewers independently assessed the certainty of evidence for each outcome, with disagreements resolved through discussion.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Study Selection</title><p>The systematic search yielded 2113 records across 4 databases: Scopus (n=993), Web of Science (n=422), IEEE Xplore (n=408), and PubMed (n=290). After removing 707 duplicates, 1406 unique records underwent title and abstract screening.</p><p>Title and abstract screening excluded 1245 records. The most frequent exclusion reasons were wrong study type (n=319, including reviews and editorials), wrong imaging modality (n=224, such as computed tomography or magnetic resonance imaging), lack of clinical validation (n=181), absence of point-of-care setting validation (n=153), and publication before 2018 (n=144). Additional exclusions included studies without an AI or ML component (n=70), retrospective designs (n=59), device development studies only (n=40), studies without clinical decision support output (n=37), nonhuman studies (n=14), and studies with missing abstracts (n=4).</p><p>All 161 records identified for full-text review were successfully retrieved; no potentially eligible reports were unretrievable. Full-text assessment excluded 141 studies, primarily due to the lack of point-of-care setting validation (n=40), device development without clinical implementation (n=35), and retrospective designs (n=33). Other exclusions included no clinical decision support output (n=7), unavailable full text (n=7), lack of clinical validation (n=7), wrong imaging modality (n=5), wrong study type (n=3), pre-2018 publication (n=2), nonhuman participants (n=1), and no AI or ML component (n=1). Twenty studies met all inclusion criteria and were included in the final synthesis (<xref ref-type="fig" rid="figure1">Figure 1</xref>). Human reviewers made all screening and selection decisions without the use of automation tools.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>PRISMA 2020 flow diagram showing study selection process. AI: artificial intelligence; CDS: clinical decision support; ML: machine learning; PRISMA: Preferred Reporting Items for Systematic Reviews and Meta-Analyses.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v5i1e80928_fig01.png"/></fig><p>A representative sample of 30 borderline cases excluded at full-text review is provided in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>. These studies met many criteria but failed to meet specific requirements, most commonly due to a retrospective design with AI applied to historical data (10/30, 33.3%) and the development of algorithms without subsequent clinical implementation (8/30, 26.7%). Other reasons included aspirational point-of-care claims without demonstrated deployment (5/30, 16.7%), specialist settings (3/30, 10%), and validation at a tertiary center only (2/30, 6.7%).</p></sec><sec id="s3-2"><title>Study Characteristics</title><p>The 20 included studies enrolled approximately 78,000 patients across 15 countries (<xref ref-type="table" rid="table1">Table 1</xref>). The temporal distribution demonstrated an accelerating research trajectory, with 85% (17/20) of studies published in 2023 or later.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Characteristics of included studies.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Author</td><td align="left" valign="bottom">Country</td><td align="left" valign="bottom">Setting</td><td align="left" valign="bottom">Design</td><td align="left" valign="bottom">N</td><td align="left" valign="bottom">Condition</td><td align="left" valign="bottom">Prevalence (%)</td></tr></thead><tbody><tr><td align="left" valign="top">Avgerinos et al [<xref ref-type="bibr" rid="ref10">10</xref>]</td><td align="left" valign="top">Greece</td><td align="left" valign="top">Emergency department</td><td align="left" valign="top">NR<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="left" valign="top">53</td><td align="left" valign="top">DVT<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td><td align="left" valign="top">11.3</td></tr><tr><td align="left" valign="top">Berg et al [<xref ref-type="bibr" rid="ref11">11</xref>]</td><td align="left" valign="top">Mexico</td><td align="left" valign="top">Hospital</td><td align="left" valign="top">Diagnostic accuracy</td><td align="left" valign="top">758</td><td align="left" valign="top">Breast cancer</td><td align="left" valign="top">7.4</td></tr><tr><td align="left" valign="top">Cao et al [<xref ref-type="bibr" rid="ref12">12</xref>]</td><td align="left" valign="top">China</td><td align="left" valign="top">Township health centers</td><td align="left" valign="top">Prospective cohort</td><td align="left" valign="top">3705</td><td align="left" valign="top">Tuberculosis</td><td align="left" valign="top">2.1</td></tr><tr><td align="left" valign="top">Chen et al [<xref ref-type="bibr" rid="ref13">13</xref>]</td><td align="left" valign="top">China</td><td align="left" valign="top">ED<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup>; primary care</td><td align="left" valign="top">Cross-sectional</td><td align="left" valign="top">364</td><td align="left" valign="top">Ophthalmic emergencies</td><td align="left" valign="top">19.1 urgent</td></tr><tr><td align="left" valign="top">Fergus et al [<xref ref-type="bibr" rid="ref14">14</xref>]</td><td align="left" valign="top">UK</td><td align="left" valign="top">Home care</td><td align="left" valign="top">Clinical trial</td><td align="left" valign="top">216</td><td align="left" valign="top">Pressure ulcers</td><td align="left" valign="top">100</td></tr><tr><td align="left" valign="top">Heydon et al [<xref ref-type="bibr" rid="ref15">15</xref>]</td><td align="left" valign="top">UK</td><td align="left" valign="top">Screening program</td><td align="left" valign="top">Diagnostic accuracy</td><td align="left" valign="top">30,405</td><td align="left" valign="top">Diabetic retinopathy</td><td align="left" valign="top">7.3</td></tr><tr><td align="left" valign="top">Iacob et al [<xref ref-type="bibr" rid="ref16">16</xref>]</td><td align="left" valign="top">Romania</td><td align="left" valign="top">Primary care</td><td align="left" valign="top">Diagnostic accuracy</td><td align="left" valign="top">1780</td><td align="left" valign="top">Cardiac pathology</td><td align="left" valign="top">32.9</td></tr><tr><td align="left" valign="top">Jaremko et al [<xref ref-type="bibr" rid="ref17">17</xref>]</td><td align="left" valign="top">Canada</td><td align="left" valign="top">Primary care</td><td align="left" valign="top">Implementation</td><td align="left" valign="top">306</td><td align="left" valign="top">Hip dysplasia</td><td align="left" valign="top">2</td></tr><tr><td align="left" valign="top">Jayaraman et al [<xref ref-type="bibr" rid="ref18">18</xref>]</td><td align="left" valign="top">India</td><td align="left" valign="top">Mobile units</td><td align="left" valign="top">NR</td><td align="left" valign="top">25,598</td><td align="left" valign="top">Tuberculosis</td><td align="left" valign="top">4</td></tr><tr><td align="left" valign="top">Kazemzadeh et al [<xref ref-type="bibr" rid="ref19">19</xref>]</td><td align="left" valign="top">Zambia</td><td align="left" valign="top">Health facilities</td><td align="left" valign="top">Prospective observational</td><td align="left" valign="top">1827</td><td align="left" valign="top">Tuberculosis</td><td align="left" valign="top">10.5</td></tr><tr><td align="left" valign="top">Love et al [<xref ref-type="bibr" rid="ref20">20</xref>]</td><td align="left" valign="top">Mexico</td><td align="left" valign="top">Government hospital</td><td align="left" valign="top">Pilot study</td><td align="left" valign="top">32</td><td align="left" valign="top">Breast masses</td><td align="left" valign="top">6</td></tr><tr><td align="left" valign="top">Malherbe [<xref ref-type="bibr" rid="ref21">21</xref>]</td><td align="left" valign="top">South Africa</td><td align="left" valign="top">Primary care</td><td align="left" valign="top">Prospective cohort</td><td align="left" valign="top">203</td><td align="left" valign="top">Breast cancer</td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Marquez et al [<xref ref-type="bibr" rid="ref22">22</xref>]</td><td align="left" valign="top">Philippines</td><td align="left" valign="top">Mixed</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">5740</td><td align="left" valign="top">Tuberculosis</td><td align="left" valign="top">13.1</td></tr><tr><td align="left" valign="top">Nath et al [<xref ref-type="bibr" rid="ref23">23</xref>]</td><td align="left" valign="top">India</td><td align="left" valign="top">PHC<sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup>+ tertiary centers</td><td align="left" valign="top">Diagnostic accuracy</td><td align="left" valign="top">4363</td><td align="left" valign="top">Tuberculosis</td><td align="left" valign="top">53.7</td></tr><tr><td align="left" valign="top">Nothnagel and Aslam [<xref ref-type="bibr" rid="ref24">24</xref>]</td><td align="left" valign="top">Germany</td><td align="left" valign="top">Hospital</td><td align="left" valign="top">Feasibility study</td><td align="left" valign="top">58</td><td align="left" valign="top">DVT</td><td align="left" valign="top">9</td></tr><tr><td align="left" valign="top">Papachristou et al [<xref ref-type="bibr" rid="ref25">25</xref>]</td><td align="left" valign="top">Sweden</td><td align="left" valign="top">Primary care</td><td align="left" valign="top">Prospective trial</td><td align="left" valign="top">253</td><td align="left" valign="top">Melanoma</td><td align="left" valign="top">8.3</td></tr><tr><td align="left" valign="top">Poli et al [<xref ref-type="bibr" rid="ref26">26</xref>]</td><td align="left" valign="top">India</td><td align="left" valign="top">Community screening</td><td align="left" valign="top">Exploratory intervention</td><td align="left" valign="top">2052</td><td align="left" valign="top">Cervical cancer</td><td align="left" valign="top">4.3</td></tr><tr><td align="left" valign="top">Yang et al [<xref ref-type="bibr" rid="ref27">27</xref>]</td><td align="left" valign="top">Madagascar</td><td align="left" valign="top">Rural villages</td><td align="left" valign="top">Diagnostic accuracy</td><td align="left" valign="top">113</td><td align="left" valign="top">Helminthiases</td><td align="left" valign="top">70</td></tr><tr><td align="left" valign="top">Yu et al [<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="left" valign="top">Sudan</td><td align="left" valign="top">Primary hospitals</td><td align="left" valign="top">Diagnostic accuracy</td><td align="left" valign="top">85&#x2010;189</td><td align="left" valign="top">Malaria</td><td align="left" valign="top">52.6</td></tr><tr><td align="left" valign="top">Zhu et al [<xref ref-type="bibr" rid="ref29">29</xref>]</td><td align="left" valign="top">United States</td><td align="left" valign="top">Community screening</td><td align="left" valign="top">NR</td><td align="left" valign="top">385</td><td align="left" valign="top">Vision-threatening diseases</td><td align="left" valign="top">20.5</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>NR: not reported.</p></fn><fn id="table1fn2"><p><sup>b</sup>DVT: deep vein thrombosis.</p></fn><fn id="table1fn3"><p><sup>c</sup>ED: emergency department.</p></fn><fn id="table1fn4"><p><sup>d</sup>PHC: primary health center.</p></fn></table-wrap-foot></table-wrap><p>The geographic distribution spanned 4 continents, with Asia and Europe each contributing 30% (6/20) of the studies, followed by Africa and North America, each at 20% (4/20; <xref ref-type="fig" rid="figure2">Figure 2</xref>). India 15% (3/20), China 10% (2/20), Mexico 10% (2/20), and the United Kingdom 10% (2/20) contributed multiple studies; single studies originated from 11 additional countries. Notably, no studies were conducted in South America or Oceania.</p><p>World Bank income classification (<xref ref-type="table" rid="table2">Table 2</xref>) revealed substantial research activity in resource-limited settings, with 60% (12/20) of studies conducted outside HICs, including 2 in LICs (Sudan and Madagascar).</p><p>Sample sizes ranged from 32 to 30,405 participants (median 375, IQR 158-2879). The most extensive studies by sample size were Heydon et al [<xref ref-type="bibr" rid="ref15">15</xref>] (n=30,405) evaluating diabetic retinopathy in the United Kingdom screening program, Jayaraman et al [<xref ref-type="bibr" rid="ref18">18</xref>] (n=25,598) evaluating tuberculosis in Indian mobile units, and Marquez et al [<xref ref-type="bibr" rid="ref22">22</xref>] (n=5740) evaluating tuberculosis in the Philippines. The clinical domains were diverse, including tuberculosis (5/20, 25%), breast cancer (3/20, 15%), deep vein thrombosis (DVT) (2/20, 10%), and diabetic retinopathy (2/20, 10%). Single studies addressed cardiac pathology, cervical cancer, developmental hip dysplasia, malaria, ophthalmic emergencies, parasitic infections, pressure ulcers, and melanoma. Disease prevalence ranged from 2% (hip dysplasia) to 70% (helminthiases).</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Illustrating the geographic distribution of included studies.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v5i1e80928_fig02.png"/></fig><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Point-of-care setting and operator characteristics.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Author</td><td align="left" valign="bottom">Facility type</td><td align="left" valign="bottom">Geographic</td><td align="left" valign="bottom">Resource context</td><td align="left" valign="bottom">Operator</td><td align="left" valign="bottom">Prior experience</td><td align="left" valign="bottom">Training duration</td></tr></thead><tbody><tr><td align="left" valign="top">Avgerinos et al [<xref ref-type="bibr" rid="ref10">10</xref>]</td><td align="left" valign="top">ED<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="top">NR<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td><td align="left" valign="top">HIC<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="top">Clinical researcher + vascular residents</td><td align="left" valign="top">None</td><td align="left" valign="top">1 hour</td></tr><tr><td align="left" valign="top">Berg et al [<xref ref-type="bibr" rid="ref11">11</xref>]</td><td align="left" valign="top">Hospital</td><td align="left" valign="top">NR</td><td align="left" valign="top">UMIC<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">Radiologist + research coordinators</td><td align="left" valign="top">None</td><td align="left" valign="top">30 min</td></tr><tr><td align="left" valign="top">Cao et al [<xref ref-type="bibr" rid="ref12">12</xref>]</td><td align="left" valign="top">Township PHC<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">Rural</td><td align="left" valign="top">UMIC</td><td align="left" valign="top">Local radiologists</td><td align="left" valign="top">NR</td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Chen et al [<xref ref-type="bibr" rid="ref13">13</xref>]</td><td align="left" valign="top">ED; PHC</td><td align="left" valign="top">NR</td><td align="left" valign="top">UMIC</td><td align="left" valign="top">Patients; staff</td><td align="left" valign="top">Mixed</td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Fergus et al [<xref ref-type="bibr" rid="ref14">14</xref>]</td><td align="left" valign="top">Home care</td><td align="left" valign="top">NR</td><td align="left" valign="top">HIC</td><td align="left" valign="top">District nurses</td><td align="left" valign="top">Varied</td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Heydon et al [<xref ref-type="bibr" rid="ref15">15</xref>]</td><td align="left" valign="top">Screening</td><td align="left" valign="top">NR</td><td align="left" valign="top">HIC</td><td align="left" valign="top">Trained graders</td><td align="left" valign="top">Trained</td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Iacob et al [<xref ref-type="bibr" rid="ref16">16</xref>]</td><td align="left" valign="top">PHC</td><td align="left" valign="top">NR</td><td align="left" valign="top">HIC</td><td align="left" valign="top">Family physicians</td><td align="left" valign="top">Nonspecialist</td><td align="left" valign="top">6 months</td></tr><tr><td align="left" valign="top">Jaremko et al [<xref ref-type="bibr" rid="ref17">17</xref>]</td><td align="left" valign="top">PHC</td><td align="left" valign="top">Towns</td><td align="left" valign="top">HIC</td><td align="left" valign="top">Nurses, LPNs<sup><xref ref-type="table-fn" rid="table2fn6">f</xref></sup>, FPs<sup><xref ref-type="table-fn" rid="table2fn7">g</xref></sup></td><td align="left" valign="top">Nonspecialist</td><td align="left" valign="top">2&#x2010;4 d</td></tr><tr><td align="left" valign="top">Jayaraman et al [<xref ref-type="bibr" rid="ref18">18</xref>]</td><td align="left" valign="top">Mobile vans</td><td align="left" valign="top">Rural</td><td align="left" valign="top">LMIC<sup><xref ref-type="table-fn" rid="table2fn8">h</xref></sup></td><td align="left" valign="top">Radiologist</td><td align="left" valign="top">15 years</td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Kazemzadeh et al [<xref ref-type="bibr" rid="ref19">19</xref>]</td><td align="left" valign="top">Health facilities</td><td align="left" valign="top">NR</td><td align="left" valign="top">LMIC</td><td align="left" valign="top">NR</td><td align="left" valign="top">NR</td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Love et al [<xref ref-type="bibr" rid="ref20">20</xref>]</td><td align="left" valign="top">Hospital</td><td align="left" valign="top">NR</td><td align="left" valign="top">UMIC</td><td align="left" valign="top">Medical student, nurse</td><td align="left" valign="top">None</td><td align="left" valign="top">30 min</td></tr><tr><td align="left" valign="top">Malherbe [<xref ref-type="bibr" rid="ref21">21</xref>]</td><td align="left" valign="top">PHC</td><td align="left" valign="top">Urban</td><td align="left" valign="top">UMIC</td><td align="left" valign="top">GP<sup><xref ref-type="table-fn" rid="table2fn9">i</xref></sup> with ultrasound experience</td><td align="left" valign="top">Experienced</td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Marquez et al [<xref ref-type="bibr" rid="ref22">22</xref>]</td><td align="left" valign="top">Mixed</td><td align="left" valign="top">NR</td><td align="left" valign="top">LMIC</td><td align="left" valign="top">Health workers</td><td align="left" valign="top">NR</td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Nath et al [<xref ref-type="bibr" rid="ref23">23</xref>]</td><td align="left" valign="top">PHC + TC<sup><xref ref-type="table-fn" rid="table2fn10">j</xref></sup></td><td align="left" valign="top">NR</td><td align="left" valign="top">LMIC</td><td align="left" valign="top">X-ray technicians</td><td align="left" valign="top">NR</td><td align="left" valign="top">Simple process</td></tr><tr><td align="left" valign="top">Nothnagel and Aslam [<xref ref-type="bibr" rid="ref24">24</xref>]</td><td align="left" valign="top">Hospital</td><td align="left" valign="top">NR</td><td align="left" valign="top">HIC</td><td align="left" valign="top">Nonspecialists</td><td align="left" valign="top">None</td><td align="left" valign="top">1 hour</td></tr><tr><td align="left" valign="top">Papachristou et al [<xref ref-type="bibr" rid="ref25">25</xref>]</td><td align="left" valign="top">PHC</td><td align="left" valign="top">NR</td><td align="left" valign="top">HIC</td><td align="left" valign="top">GPs, residents</td><td align="left" valign="top">Varied</td><td align="left" valign="top">On-site</td></tr><tr><td align="left" valign="top">Poli et al [<xref ref-type="bibr" rid="ref26">26</xref>]</td><td align="left" valign="top">Community</td><td align="left" valign="top">Rural</td><td align="left" valign="top">LMIC</td><td align="left" valign="top">Nurses</td><td align="left" valign="top">3&#x2010;12 years VIA<sup><xref ref-type="table-fn" rid="table2fn11">k</xref></sup></td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Yang et al [<xref ref-type="bibr" rid="ref27">27</xref>]</td><td align="left" valign="top">Rural villages</td><td align="left" valign="top">Rural</td><td align="left" valign="top">LIC<sup><xref ref-type="table-fn" rid="table2fn12">l</xref></sup></td><td align="left" valign="top">Local HCWs<sup><xref ref-type="table-fn" rid="table2fn13">m</xref></sup></td><td align="left" valign="top">NR</td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Yu et al [<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="left" valign="top">Primary hospitals</td><td align="left" valign="top">Rural</td><td align="left" valign="top">LIC</td><td align="left" valign="top">Microscopists</td><td align="left" valign="top">NR</td><td align="left" valign="top">Online session</td></tr><tr><td align="left" valign="top">Zhu et al [<xref ref-type="bibr" rid="ref29">29</xref>]</td><td align="left" valign="top">Community</td><td align="left" valign="top">NR</td><td align="left" valign="top">HIC</td><td align="left" valign="top">Medical students</td><td align="left" valign="top">None</td><td align="left" valign="top">Equipment training</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>ED: emergency department.</p></fn><fn id="table2fn2"><p><sup>b</sup>NR: not reported.</p></fn><fn id="table2fn3"><p><sup>c</sup>HIC: high-income country.</p></fn><fn id="table2fn4"><p><sup>d</sup>UMIC: upper-middle-income country.</p></fn><fn id="table2fn5"><p><sup>e</sup>PHC: primary health center.</p></fn><fn id="table2fn6"><p><sup>f</sup>LPN: licensed practical nurse.</p></fn><fn id="table2fn7"><p><sup>g</sup>FP: family physician.</p></fn><fn id="table2fn8"><p><sup>h</sup>LMIC: lower-middle-income country.</p></fn><fn id="table2fn9"><p><sup>i</sup>GP: general practitioner.</p></fn><fn id="table2fn10"><p><sup>j</sup>TC: tertiary center.</p></fn><fn id="table2fn11"><p><sup>k</sup>VIA: visual inspection with acetic acid.</p></fn><fn id="table2fn12"><p><sup>l</sup>LIC: low-income country.</p></fn><fn id="table2fn13"><p><sup>m</sup>HCW: health care worker.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-3"><title>Point-of-Care Settings and Operator Characteristics</title><p>Point-of-care settings varied substantially across the 20 studies, reflecting the versatility of AI-assisted imaging (<xref ref-type="table" rid="table2">Table 2</xref>). Primary care facilities predominated (6/20, 30%), followed by community screening programs (4/20, 20%), general hospitals (3/20, 15%), emergency departments (2/20, 10%), and mixed settings (2/20, 10%). Deployments also included mobile diagnostic units [<xref ref-type="bibr" rid="ref18">18</xref>], home care [<xref ref-type="bibr" rid="ref14">14</xref>], and remote rural villages [<xref ref-type="bibr" rid="ref27">27</xref>], demonstrating adaptability to decentralized health care delivery. Geographic settings, where reported, revealed that 25% (5/20) of studies were conducted in rural areas, 5% (1/20) in urban settings, and 70% (14/20) did not specify a geographic context.</p></sec><sec id="s3-4"><title>Task-Shifting Evidence</title><p>A central finding of this review is evidence for task-shifting enabled by AI-assisted imaging (<xref ref-type="fig" rid="figure3">Figure 3</xref>). A total of 13 (13/20, 65%) studies explicitly demonstrated task-shifting from specialists to nonspecialist health care workers, transferring tasks traditionally performed by radiologists, cardiologists, dermatologists, and ophthalmologists to primary care physicians, nurses, community health workers, and medical students. The operator types included primary care physicians (n=4), nurses or community health workers (n=4), health care workers without prior imaging experience (n=4), x-ray technicians (n=3), and medical students (n=2). Further, 7 studies did not specify the primary operator. Notably, most task-shifted operators had no prior experience with the specific imaging modality before training.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>AI-enabled task-shifting in point-of-care imaging. The figure illustrates the transfer of diagnostic tasks from specialist physicians (left) to nonspecialist health care workers (right) enabled by AI-assisted point-of-care imaging systems. AI: artificial intelligence; CHW:.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v5i1e80928_fig03.png"/></fig></sec><sec id="s3-5"><title>Training Requirements</title><p>Training requirements varied across studies reporting training details (13/20, 65%). Among studies reporting training details, the duration ranged remarkably from 30 minutes to 6 months (median 1 h; n=6 studies reporting specific numeric durations); IQR was not calculated due to the small number of studies and extreme right skew of the distribution. A total of 6 of 13 (46%) studies achieved acceptable diagnostic performance with training programs of 1 hour or less. Training formats included brief PowerPoint (Microsoft Corp) presentations (n=4), app-based training (n=2), in-person demonstrations (n=3), hands-on supervised practice (n=4), and online training sessions (n=2).</p><p>No clear dose-response relationship emerged between training duration and diagnostic performance. Studies with 30&#x2010;60 minutes of training achieved sensitivities of 100%, comparable to or exceeding those with longer programs. Love et al [<xref ref-type="bibr" rid="ref20">20</xref>] reported a medical student achieving 100% sensitivity and specificity for breast mass classification after 30 minutes of PowerPoint training. Avgerinos et al [<xref ref-type="bibr" rid="ref10">10</xref>] demonstrated that nonultrasound-trained providers achieved 100% sensitivity and 95.7% specificity for DVT after 1 hour of app training. Similarly, Nothnagel and Aslam [<xref ref-type="bibr" rid="ref24">24</xref>] found that health care professionals without ultrasound training achieved 100% sensitivity and 90.6% specificity for DVT after 1 hour. Berg et al [<xref ref-type="bibr" rid="ref11">11</xref>] reported that research coordinators achieved 86% sensitivity for breast cancer triage after 30 minutes of training, although specificity was lower (33%) with portable devices.</p><p>Among studies where task-shifted performance was explicitly compared to specialists or predefined thresholds, 62% (8/13) demonstrated performance comparable to specialists, 31% (4/13) met predefined acceptable thresholds, and only 8% (1/13) showed performance below acceptable levels.</p></sec><sec id="s3-6"><title>AI System Characteristics</title><p>Having characterized this study&#x2019;s settings and operators, we next examined the AI systems themselves. The 20 studies evaluated 18 distinct AI systems (<xref ref-type="table" rid="table3">Table 3</xref>): commercial products (12/20, 60%), research prototypes (7/20, 35%), and 1 open-source system (Kankanet). Commercial systems included ThinkSono Guidance, Koios DS, EyeArt, MEDO-Hip, Genki, Breast AI, qXR, Dermalyser, and SELENA+.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>AI<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> system and imaging characteristics.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Author</td><td align="left" valign="bottom">AI system</td><td align="left" valign="bottom">Architecture</td><td align="left" valign="bottom">Commercial status</td><td align="left" valign="bottom">Regulatory</td><td align="left" valign="bottom">Modality</td><td align="left" valign="bottom">Device type</td></tr></thead><tbody><tr><td align="left" valign="top">Avgerinos et al [<xref ref-type="bibr" rid="ref10">10</xref>]</td><td align="left" valign="top">ThinkSono Guidance</td><td align="left" valign="top">NR<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="top">Commercial</td><td align="left" valign="top">CE<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup> IIb<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></td><td align="left" valign="top">POCUS<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></td><td align="left" valign="top">Handheld</td></tr><tr><td align="left" valign="top">Berg et al [<xref ref-type="bibr" rid="ref11">11</xref>]</td><td align="left" valign="top">Koios DS</td><td align="left" valign="top">NR</td><td align="left" valign="top">Commercial</td><td align="left" valign="top">NR</td><td align="left" valign="top">Breast ultrasound</td><td align="left" valign="top">Portable/Cart</td></tr><tr><td align="left" valign="top">Cao et al [<xref ref-type="bibr" rid="ref12">12</xref>]</td><td align="left" valign="top">JF CXR-1 v3.0</td><td align="left" valign="top">DL<sup><xref ref-type="table-fn" rid="table3fn6">f</xref></sup></td><td align="left" valign="top">Commercial</td><td align="left" valign="top">NR</td><td align="left" valign="top">CXR<sup><xref ref-type="table-fn" rid="table3fn7">g</xref></sup></td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Chen et al [<xref ref-type="bibr" rid="ref13">13</xref>]</td><td align="left" valign="top">EE-Explorer</td><td align="left" valign="top">DenseNet + XGBoost</td><td align="left" valign="top">Research</td><td align="left" valign="top">NR</td><td align="left" valign="top">Photography</td><td align="left" valign="top">Mobile</td></tr><tr><td align="left" valign="top">Fergus et al [<xref ref-type="bibr" rid="ref14">14</xref>]</td><td align="left" valign="top">Faster R-CNN<sup><xref ref-type="table-fn" rid="table3fn8">h</xref></sup></td><td align="left" valign="top">ResNet101</td><td align="left" valign="top">Research</td><td align="left" valign="top">NR</td><td align="left" valign="top">Photography</td><td align="left" valign="top">Mobile</td></tr><tr><td align="left" valign="top">Heydon et al [<xref ref-type="bibr" rid="ref15">15</xref>]</td><td align="left" valign="top">EyeArt v2.1.0</td><td align="left" valign="top">DL</td><td align="left" valign="top">Commercial</td><td align="left" valign="top">CE</td><td align="left" valign="top">Fundus</td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Iacob et al [<xref ref-type="bibr" rid="ref16">16</xref>]</td><td align="left" valign="top">Wis+</td><td align="left" valign="top">CNN<sup><xref ref-type="table-fn" rid="table3fn8">h</xref></sup></td><td align="left" valign="top">Commercial</td><td align="left" valign="top">NR</td><td align="left" valign="top">Cardiac US</td><td align="left" valign="top">Cart</td></tr><tr><td align="left" valign="top">Jaremko et al [<xref ref-type="bibr" rid="ref17">17</xref>]</td><td align="left" valign="top">MEDO-Hip</td><td align="left" valign="top">UNet CNN</td><td align="left" valign="top">Commercial</td><td align="left" valign="top">FDA<sup><xref ref-type="table-fn" rid="table3fn9">i</xref></sup></td><td align="left" valign="top">Hip US</td><td align="left" valign="top">Handheld</td></tr><tr><td align="left" valign="top">Jayaraman et al [<xref ref-type="bibr" rid="ref18">18</xref>]</td><td align="left" valign="top">Genki v1</td><td align="left" valign="top">UNet + Xception</td><td align="left" valign="top">Commercial</td><td align="left" valign="top">NR</td><td align="left" valign="top">CXR</td><td align="left" valign="top">Mobile</td></tr><tr><td align="left" valign="top">Kazemzadeh et al [<xref ref-type="bibr" rid="ref19">19</xref>]</td><td align="left" valign="top">Google tuberculosis AI</td><td align="left" valign="top">NR</td><td align="left" valign="top">Research</td><td align="left" valign="top">NR</td><td align="left" valign="top">CXR</td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Love et al [<xref ref-type="bibr" rid="ref20">20</xref>]</td><td align="left" valign="top">Triage-CADx</td><td align="left" valign="top">Inception-v3</td><td align="left" valign="top">Research</td><td align="left" valign="top">NR</td><td align="left" valign="top">Breast ultrasound</td><td align="left" valign="top">Portable</td></tr><tr><td align="left" valign="top">Malherbe [<xref ref-type="bibr" rid="ref21">21</xref>]</td><td align="left" valign="top">Breast AI</td><td align="left" valign="top">NR</td><td align="left" valign="top">Commercial</td><td align="left" valign="top">SAHPRA<sup><xref ref-type="table-fn" rid="table3fn10">j</xref></sup></td><td align="left" valign="top">Breast ultrasound</td><td align="left" valign="top">Handheld</td></tr><tr><td align="left" valign="top">Marquez et al [<xref ref-type="bibr" rid="ref22">22</xref>]</td><td align="left" valign="top">qXR v3</td><td align="left" valign="top">NR</td><td align="left" valign="top">Commercial</td><td align="left" valign="top">NR</td><td align="left" valign="top">CXR</td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Nath et al [<xref ref-type="bibr" rid="ref23">23</xref>]</td><td align="left" valign="top">DecXpert v1.4</td><td align="left" valign="top">CNN + attention</td><td align="left" valign="top">Research</td><td align="left" valign="top">NR</td><td align="left" valign="top">CXR</td><td align="left" valign="top">Basic</td></tr><tr><td align="left" valign="top">Nothnagel and Aslam [<xref ref-type="bibr" rid="ref24">24</xref>]</td><td align="left" valign="top">ThinkSono Guidance</td><td align="left" valign="top">U-Net CNN</td><td align="left" valign="top">Commercial</td><td align="left" valign="top">CE I</td><td align="left" valign="top">POCUS</td><td align="left" valign="top">Handheld</td></tr><tr><td align="left" valign="top">Papachristou et al [<xref ref-type="bibr" rid="ref25">25</xref>]</td><td align="left" valign="top">Dermalyser</td><td align="left" valign="top">CNN</td><td align="left" valign="top">Commercial</td><td align="left" valign="top">NR</td><td align="left" valign="top">Dermoscopy</td><td align="left" valign="top">Handheld</td></tr><tr><td align="left" valign="top">Poli et al [<xref ref-type="bibr" rid="ref26">26</xref>]</td><td align="left" valign="top">VIA-AI</td><td align="left" valign="top">EfficientNet</td><td align="left" valign="top">Research</td><td align="left" valign="top">NR</td><td align="left" valign="top">Colposcopy</td><td align="left" valign="top">Mobile</td></tr><tr><td align="left" valign="top">Yang et al [<xref ref-type="bibr" rid="ref27">27</xref>]</td><td align="left" valign="top">Kankanet</td><td align="left" valign="top">SSD + MobileNet</td><td align="left" valign="top">Open-source</td><td align="left" valign="top">NR</td><td align="left" valign="top">Microscopy</td><td align="left" valign="top">Mobile</td></tr><tr><td align="left" valign="top">Yu et al [<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="left" valign="top">Malaria Screener</td><td align="left" valign="top">DL</td><td align="left" valign="top">Research</td><td align="left" valign="top">NR</td><td align="left" valign="top">Microscopy</td><td align="left" valign="top">Mobile</td></tr><tr><td align="left" valign="top">Zhu et al [<xref ref-type="bibr" rid="ref29">29</xref>]</td><td align="left" valign="top">SELENA+</td><td align="left" valign="top">CNN</td><td align="left" valign="top">Commercial</td><td align="left" valign="top">NR</td><td align="left" valign="top">Fundus</td><td align="left" valign="top">Cart</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>AI: artificial intelligence.</p></fn><fn id="table3fn2"><p><sup>b</sup>NR: not reported.</p></fn><fn id="table3fn3"><p><sup>c</sup>CE: conformit&#x00E9; europ&#x00E9;enne.</p></fn><fn id="table3fn4"><p><sup>d</sup>IIb: Class IIb medical device software.</p></fn><fn id="table3fn5"><p><sup>e</sup>POCUS: point-of-care ultrasound.</p></fn><fn id="table3fn6"><p><sup>f</sup>DL: deep learning.</p></fn><fn id="table3fn7"><p><sup>g</sup>CXR: chest x-ray.</p></fn><fn id="table3fn8"><p><sup>h</sup>CNN: convolutional neural network.</p></fn><fn id="table3fn9"><p><sup>i</sup>FDA: Food and Drug Administration.</p></fn><fn id="table3fn10"><p><sup>j</sup>SAHPRA: South African Health Products Regulatory Authority.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-7"><title>Architectures</title><p>Deep learning architectures dominated (18/20, 90%), with 1 hybrid system combining deep learning and gradient boosting (EE-Explorer), and 1 study not reporting its architecture. No studies used traditional ML approaches alone. Specific architectures included convolutional neural networks of unspecified type (n=7), U-Net or UNet-like architectures for segmentation tasks (n=3), Inception-v3 for classification (n=2), and individual studies using DenseNet, EfficientNet, ResNet-101, SSD with MobileNet, and Faster R-CNN.</p></sec><sec id="s3-8"><title>Regulatory Status</title><p>Regulatory status was largely unreported, representing a critical gap. Only 25% (5/20) of studies reported any regulatory approval: FDA clearance for MEDO-Hip [<xref ref-type="bibr" rid="ref17">17</xref>], CE (conformit&#x00E9; europ&#x00E9;enne) marking for ThinkSono Guidance (Class IIb [<xref ref-type="bibr" rid="ref10">10</xref>] and Class I [<xref ref-type="bibr" rid="ref24">24</xref>]) and EyeArt [<xref ref-type="bibr" rid="ref15">15</xref>], and South African SAHPRA (South African Health Products Regulatory Authority) approval for Breast AI [<xref ref-type="bibr" rid="ref21">21</xref>]. The remaining 75% (15/20) either did not report their regulatory status or used unapproved systems.</p></sec><sec id="s3-9"><title>Imaging Modalities and Devices</title><p>Imaging modalities reflected in point-of-care contexts included ultrasound (7/20, 35%), chest x-ray (5/20, 25%), smartphone photography (3/20, 15%), fundus photography (2/20, 10%), microscopy (2/20, 10%), and dermoscopy (1/20, 5%). Device types emphasized portability, with mobile phone-based devices (6/20, 30%), handheld devices (5/20, 25%), cart-based devices (3/20, 15%), and portable units (2/20, 10%) being the most prevalent; device type was not reported for 20% (4/20) of studies. Real-time processing was reported in 70% (14/20) of studies, and offline capability&#x2014;critical for settings with unreliable connectivity&#x2014;was confirmed in 30% (6/20) of the studies.</p></sec><sec id="s3-10"><title>Diagnostic Performance</title><sec id="s3-10-1"><title>Overview</title><p>We next assessed the primary outcomes of diagnostic accuracy. Diagnostic performance metrics were reported heterogeneously (<xref ref-type="table" rid="table4">Table 4</xref>). Sensitivity, reported in 90% (18/20) of studies, ranged from 62.5% to 100% (median 93.6%, IQR 87%-98%). Specificity, reported in 85% (17/20), ranged from 28.1% to 100% (median 90.6%, IQR 74.5%-96.7%), reflecting a broader variation that is context-dependent and likely due to threshold optimization (<xref ref-type="fig" rid="figure4">Figure 4</xref> [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref29">29</xref>]). AUC was reported in 50% (10/20) of studies, with a range of 0.63 to 1.00 (median 0.91, IQR 0.82-0.96). PPV ranged from 14% to 92.6% (median 53.4%, IQR 24.3%-86.8%), reflecting the effects of disease prevalence, while NPV was consistently high (median 98.4%, IQR 95.1%-99%), supporting its use for ruling out disease.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Diagnostic performance summary.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Author</td><td align="left" valign="bottom">Condition</td><td align="left" valign="bottom">Value, N</td><td align="left" valign="bottom">Sensitivity % (95% CI)</td><td align="left" valign="bottom">Specificity % (95% CI)</td><td align="left" valign="bottom">AUC<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup> (95% CI)</td></tr></thead><tbody><tr><td align="left" valign="top">Avgerinos et al [<xref ref-type="bibr" rid="ref10">10</xref>]</td><td align="left" valign="top">DVT<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup></td><td align="left" valign="top">53</td><td align="left" valign="top">100 (NR<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup>)</td><td align="left" valign="top">95.7 (NR)</td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Berg et al [<xref ref-type="bibr" rid="ref11">11</xref>]</td><td align="left" valign="top">Breast cancer</td><td align="left" valign="top">758</td><td align="left" valign="top">95 (89&#x2010;100)</td><td align="left" valign="top">79 (76&#x2010;82)</td><td align="left" valign="top">0.95 (0.91&#x2010;0.99)</td></tr><tr><td align="left" valign="top">Cao et al [<xref ref-type="bibr" rid="ref12">12</xref>]</td><td align="left" valign="top">Tuberculosis</td><td align="left" valign="top">3705</td><td align="left" valign="top">92.1 (86.0&#x2010;98.2)</td><td align="left" valign="top">94.5 (93.8&#x2010;95.3)</td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Chen et al [<xref ref-type="bibr" rid="ref13">13</xref>]</td><td align="left" valign="top">Ophthalmic</td><td align="left" valign="top">364</td><td align="left" valign="top">90&#x2010;96.2 (86.4&#x2010;93.6)</td><td align="left" valign="top">NR</td><td align="left" valign="top">0.98 (0.97&#x2010;1.00)</td></tr><tr><td align="left" valign="top">Fergus et al [<xref ref-type="bibr" rid="ref14">14</xref>]</td><td align="left" valign="top">Pressure ulcer</td><td align="left" valign="top">216</td><td align="left" valign="top">70 (NR)</td><td align="left" valign="top">NR</td><td align="left" valign="top">0.63&#x2010;0.93</td></tr><tr><td align="left" valign="top">Heydon et al [<xref ref-type="bibr" rid="ref15">15</xref>]</td><td align="left" valign="top">Referable DR<sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup></td><td align="left" valign="top">30,405</td><td align="left" valign="top">95.7 (94.8&#x2010;96.5)</td><td align="left" valign="top">54 (53.4&#x2010;54.5)</td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Iacob et al [<xref ref-type="bibr" rid="ref16">16</xref>]</td><td align="left" valign="top">Cardiac</td><td align="left" valign="top">1780</td><td align="left" valign="top">89.9 (87.2&#x2010;92.2)</td><td align="left" valign="top">96.5 (95.3&#x2010;97.5)</td><td align="left" valign="top">0.94 (0.92&#x2010;0.96)</td></tr><tr><td align="left" valign="top">Jaremko et al [<xref ref-type="bibr" rid="ref17">17</xref>]</td><td align="left" valign="top">DDH<sup><xref ref-type="table-fn" rid="table4fn5">e</xref></sup></td><td align="left" valign="top">306</td><td align="left" valign="top">NR</td><td align="left" valign="top">100 (NR)</td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Jayaraman et al [<xref ref-type="bibr" rid="ref18">18</xref>]</td><td align="left" valign="top">Tuberculosis</td><td align="left" valign="top">25,598</td><td align="left" valign="top">98 (97&#x2010;98.8)</td><td align="left" valign="top">96.9 (96.6&#x2010;97.1)</td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Kazemzadeh et al [<xref ref-type="bibr" rid="ref19">19</xref>]</td><td align="left" valign="top">Tuberculosis</td><td align="left" valign="top">1827</td><td align="left" valign="top">87 (82&#x2010;92)</td><td align="left" valign="top">70 (67&#x2010;72)</td><td align="left" valign="top">0.87 (0.84&#x2010;0.90)</td></tr><tr><td align="left" valign="top">Love et al [<xref ref-type="bibr" rid="ref20">20</xref>]</td><td align="left" valign="top">Breast mass</td><td align="left" valign="top">32</td><td align="left" valign="top">100 (NR)</td><td align="left" valign="top">100 (NR)</td><td align="left" valign="top">1.00</td></tr><tr><td align="left" valign="top">Malherbe [<xref ref-type="bibr" rid="ref21">21</xref>]</td><td align="left" valign="top">Breast cancer</td><td align="left" valign="top">203</td><td align="left" valign="top">NR</td><td align="left" valign="top">NR</td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Marquez et al [<xref ref-type="bibr" rid="ref22">22</xref>]</td><td align="left" valign="top">Tuberculosis</td><td align="left" valign="top">5740</td><td align="left" valign="top">95.6 (95.1&#x2010;96.1)</td><td align="left" valign="top">28.1 (26.9&#x2010;29.2)</td><td align="left" valign="top">0.82 (0.80&#x2010;0.84)</td></tr><tr><td align="left" valign="top">Nath et al [<xref ref-type="bibr" rid="ref23">23</xref>]</td><td align="left" valign="top">Tuberculosis</td><td align="left" valign="top">4363</td><td align="left" valign="top">88 (85&#x2010;93)</td><td align="left" valign="top">85 (82&#x2010;91)</td><td align="left" valign="top">0.85 (0.82&#x2010;0.87)</td></tr><tr><td align="left" valign="top">Nothnagel and Aslam [<xref ref-type="bibr" rid="ref24">24</xref>]</td><td align="left" valign="top">DVT</td><td align="left" valign="top">58</td><td align="left" valign="top">100 (99.1&#x2010;100)</td><td align="left" valign="top">90.6 (90.5&#x2010;91.7)</td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Papachristou et al [<xref ref-type="bibr" rid="ref25">25</xref>]</td><td align="left" valign="top">Melanoma</td><td align="left" valign="top">253</td><td align="left" valign="top">95.2 (NR)</td><td align="left" valign="top">84.5 (NR)</td><td align="left" valign="top">0.96 (0.93&#x2010;0.98)</td></tr><tr><td align="left" valign="top">Poli et al [<xref ref-type="bibr" rid="ref26">26</xref>]</td><td align="left" valign="top">Cervical</td><td align="left" valign="top">2052</td><td align="left" valign="top">62.5 (51.5&#x2010;72.6)</td><td align="left" valign="top">97.6 (96.8&#x2010;98.2)</td><td align="left" valign="top">0.76</td></tr><tr><td align="left" valign="top">Yang et al [<xref ref-type="bibr" rid="ref27">27</xref>]</td><td align="left" valign="top">Lumbricoides</td><td align="left" valign="top">113</td><td align="left" valign="top">85.7 (NR)</td><td align="left" valign="top">87.5 (NR)</td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Yu et al [<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="left" valign="top">Malaria</td><td align="left" valign="top">85&#x2010;189</td><td align="left" valign="top">86.9&#x2010;100 (79-100)</td><td align="left" valign="top">51.1&#x2010;91.1 (36-97)</td><td align="left" valign="top">NR</td></tr><tr><td align="left" valign="top">Zhu et al [<xref ref-type="bibr" rid="ref29">29</xref>]</td><td align="left" valign="top">VTDs<sup><xref ref-type="table-fn" rid="table4fn6">f</xref></sup></td><td align="left" valign="top">385</td><td align="left" valign="top">63.2 (47.8&#x2010;78.5)</td><td align="left" valign="top">94.5 (93&#x2010;95.9)</td><td align="left" valign="top">NR</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>AUC: area under the receiver operating characteristic curve.</p></fn><fn id="table4fn2"><p><sup>b</sup>DVT: deep vein thrombosis.</p></fn><fn id="table4fn3"><p><sup>c</sup>NR: not reported.</p></fn><fn id="table4fn4"><p><sup>d</sup>DR: diabetic retinopathy.</p></fn><fn id="table4fn5"><p><sup>e</sup>DDH: developmental dysplasia of the hip.</p></fn><fn id="table4fn6"><p><sup>f</sup>VTD: vision-threatening disease.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Forest plot of diagnostic accuracy showing sensitivity (left panel) and specificity (right panel) with 95% CIs, grouped by clinical condition. Vertical dashed lines indicate median values (sensitivity 93.6%, specificity 90.6%). Each horizontal dash between panels corresponds to 1 study and serves as a visual guide to help readers track data points across both metrics for the same study. The absence of a data point in the corresponding panel indicates that metrics are missing. DVT: deep vein thrombosis [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref29">29</xref>].</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v5i1e80928_fig04.png"/></fig></sec><sec id="s3-10-2"><title>Performance by Clinical Condition</title><p>Tuberculosis studies (n=5) demonstrated sensitivities ranging from 87% to 98% (median 92.1%, IQR 87.5%-96.8%), specificities ranging from 28% to 97% (median 85%, IQR 49%-95.7%), and AUCs ranging from 0.82 to 0.87. Lower specificity in some studies (Marquez et al [<xref ref-type="bibr" rid="ref22">22</xref>]: 28%) reflected operational decisions aimed at maximizing sensitivity in high-prevalence settings. Both DVT studies achieved 100% sensitivity with specificity ranging from 90.6% to 95.7%, demonstrating excellent performance. Breast imaging studies (n=3) achieved a sensitivity of 95%&#x2010;100%, although specificity varied (79%&#x2010;100%) depending on the operator&#x2019;s training and the type of device used. Diabetic retinopathy studies (n=2) showed more variable performance (sensitivity: 63%&#x2010;96%), reflecting challenges with image quality and the presence of ungradable images.</p></sec><sec id="s3-10-3"><title>Performance by Resource Context</title><p>Analysis by resource context revealed slightly lower performance in LMIC/LIC settings: HIC studies (n=8) achieved a median sensitivity of 95.2% (IQR 70%-100%) and specificity of 94.5% (IQR 84.5%-96.5%), compared with a median sensitivity of 88% (IQR 85.7%-98%) and specificity of 85% (IQR 51.1%-96.9%) in LMIC/LIC studies (n=7). However, these differences may reflect the clinical context rather than inherent limitations of AI. Several LMIC studies achieved excellent performance: Jayaraman et al [<xref ref-type="bibr" rid="ref18">18</xref>] (India) reported 98% sensitivity and 97% specificity for tuberculosis, Nath et al [<xref ref-type="bibr" rid="ref23">23</xref>] (India) achieved 88% sensitivity and 85% specificity, and Kazemzadeh et al [<xref ref-type="bibr" rid="ref19">19</xref>] (Zambia) demonstrated 87% sensitivity and 70% specificity.</p></sec><sec id="s3-10-4"><title>AI Versus Comparator Performance</title><p>All 20 studies included some form of comparator (<xref ref-type="table" rid="table5">Table 5</xref>). A total of 8 studies compared AI against the reference standard alone, while 5 directly compared AI to specialist physicians, 3 compared it to nonspecialist physicians or nurses, 2 used standard-of-care comparisons, and 2 used expert panels.</p><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>AI versus comparator performance<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup>.</p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Author</td><td align="left" valign="bottom">Comparator</td><td align="left" valign="bottom">Metrics</td><td align="left" valign="bottom">AI, n/N (%)</td><td align="left" valign="bottom">Comparator, n/N (%)</td><td align="left" valign="bottom">Statistical test</td><td align="left" valign="bottom">Result</td></tr></thead><tbody><tr><td align="left" valign="top">Avgerinos et al [<xref ref-type="bibr" rid="ref10">10</xref>]</td><td align="left" valign="top">D-dimer</td><td align="left" valign="top">Sensitivity; specificity</td><td align="left" valign="top">6/6 (100); 45/47 (95.7)</td><td align="left" valign="top">6/6 (100); 17/47 (36.2)</td><td align="left" valign="top">NR</td><td align="left" valign="top">AI superior (specificity)</td></tr><tr><td align="left" valign="top">Berg et al [<xref ref-type="bibr" rid="ref11">11</xref>]</td><td align="left" valign="top">Radiologist</td><td align="left" valign="top">Sensitivity; specificity</td><td align="left" valign="top">53/56 (95); 554/702 (79)</td><td align="left" valign="top">56/56 (100); 612/702 (87)</td><td align="left" valign="top">DeLong</td><td align="left" valign="top">Noninferior (<italic>P</italic>=.10)</td></tr><tr><td align="left" valign="top">Cao et al [<xref ref-type="bibr" rid="ref12">12</xref>]</td><td align="left" valign="top">Local radiologists</td><td align="left" valign="top">Sensitivity; specificity</td><td align="left" valign="top">70/76 (92.1); 3431/3629 (94.5)</td><td align="left" valign="top">25/76 (32.9); 3604/3629 (99.3)</td><td align="left" valign="top">McNemar</td><td align="left" valign="top">AI superior (sensitivity; <italic>P</italic>&#x003C;.05)<sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup></td></tr><tr><td align="left" valign="top">Chen et al [<xref ref-type="bibr" rid="ref13">13</xref>]</td><td align="left" valign="top">Triage nurses</td><td align="left" valign="top">Accuracy</td><td align="left" valign="top">246/261 (94.3)</td><td align="left" valign="top">215/261 (82.4)</td><td align="left" valign="top">McNemar</td><td align="left" valign="top">AI superior (<italic>P</italic>&#x003C;.001)</td></tr><tr><td align="left" valign="top">Kazemzadeh et al [<xref ref-type="bibr" rid="ref19">19</xref>]</td><td align="left" valign="top">10 radiologists</td><td align="left" valign="top">Sensitivity; specificity</td><td align="left" valign="top">167/192 (87); 1145/1635 (70)</td><td align="left" valign="top">76<sup><xref ref-type="table-fn" rid="table5fn3">c</xref></sup>; 82<sup><xref ref-type="table-fn" rid="table5fn3">c</xref></sup></td><td align="left" valign="top">Obuchowski-Rockette-Hillis procedure</td><td align="left" valign="top">AI superior (sens; <italic>P</italic>&#x003C;.001)</td></tr><tr><td align="left" valign="top">Nath et al [<xref ref-type="bibr" rid="ref23">23</xref>]</td><td align="left" valign="top">3 radiologists</td><td align="left" valign="top">Sensitivity; specificity</td><td align="left" valign="top">2064/2345 (88); 1715/2018 (85)</td><td align="left" valign="top">1665/2345 (71, PPV<sup><xref ref-type="table-fn" rid="table5fn4">d</xref></sup>); NR<sup><xref ref-type="table-fn" rid="table5fn5">e</xref></sup> (84, NPV<sup><xref ref-type="table-fn" rid="table5fn6">f</xref></sup>)</td><td align="left" valign="top">NR</td><td align="left" valign="top">AI superior (sensitivity)<sup><xref ref-type="table-fn" rid="table5fn7">g</xref></sup></td></tr><tr><td align="left" valign="top">Papachristou et al [<xref ref-type="bibr" rid="ref25">25</xref>]</td><td align="left" valign="top">PCPs<sup><xref ref-type="table-fn" rid="table5fn8">h</xref></sup></td><td align="left" valign="top">PPV</td><td align="left" valign="top">20/56 (36)</td><td align="left" valign="top">12/51 (24)</td><td align="left" valign="top">Logistic regression</td><td align="left" valign="top">AI superior (OR<sup><xref ref-type="table-fn" rid="table5fn9">i</xref></sup>=26.55, <italic>P</italic>=.002)</td></tr></tbody></table><table-wrap-foot><fn id="table5fn1"><p><sup>a</sup>The table includes only studies with direct head-to-head comparison data. All 20 studies had some form of comparator (reference standard or human comparator).</p></fn><fn id="table5fn2"><p><sup>b</sup>Exact <italic>P</italic> value not reported in source; <italic>P</italic>&#x003C;.05 indicates statistical significance threshold met.</p></fn><fn id="table5fn3"><p><sup>c</sup>Radiologist performance represents the mean across 10 independent readers; absolute values cannot be expressed as a single numerator/denominator.</p></fn><fn id="table5fn4"><p><sup>d</sup>PPV: positive predictive value.</p></fn><fn id="table5fn5"><p><sup>e</sup>NR: not reported. Absolute values for comparator negative predictive value could not be reliably derived.</p></fn><fn id="table5fn6"><p><sup>f</sup>NPV: negative predictive value.</p></fn><fn id="table5fn7"><p><sup>g</sup>Direct metric comparison limited; comparator study reported positive predictive value or negative predictive value rather than sensitivity or specificity.</p></fn><fn id="table5fn8"><p><sup>h</sup>PCP: primary care physician.</p></fn><fn id="table5fn9"><p><sup>i</sup>OR: odds ratio.</p></fn></table-wrap-foot></table-wrap><p>Among studies with head-to-head comparisons, AI demonstrated superior performance in most cases. Cao et al [<xref ref-type="bibr" rid="ref12">12</xref>] found AI sensitivity (92.1%) significantly exceeded that of local radiologists (32.9%) for tuberculosis detection (McNemar test, <italic>P</italic>&#x003C;.05; exact value not reported). Kazemzadeh et al [<xref ref-type="bibr" rid="ref19">19</xref>] reported that AI achieved higher sensitivity (87%) than 10 experienced radiologists (mean 76%) for tuberculosis screening (<italic>P&#x003C;.</italic>001). Chen et al [<xref ref-type="bibr" rid="ref13">13</xref>] demonstrated that AI accuracy (94.3%) significantly exceeded that of triage nurses (82.4%) for ophthalmic emergencies (<italic>P</italic>&#x003C;.001). Papachristou et al [<xref ref-type="bibr" rid="ref25">25</xref>] showed AI substantially outperformed primary care physicians for melanoma detection (OR 26.55, <italic>P</italic>=.002 vs OR 3.35, <italic>P</italic>=.02). A total of 2 studies demonstrated equivalence: Berg et al [<xref ref-type="bibr" rid="ref11">11</xref>] found AI performance with standard ultrasound (AUC 0.95) was noninferior to expert radiologists (AUC 0.98, <italic>P</italic>=.10), and Avgerinos et al [<xref ref-type="bibr" rid="ref10">10</xref>] showed AI-assisted point-of-care ultrasound matched D-dimer sensitivity (100%) while achieving substantially higher specificity (95.7% vs 36.2%).</p><p>Notably, Berg et al [<xref ref-type="bibr" rid="ref11">11</xref>] demonstrated that AI performance decreased when images were acquired by minimally trained operators using portable devices rather than specialists using standard equipment, highlighting the importance of image quality for AI performance.</p></sec></sec><sec id="s3-11"><title>Explainability (XAI) Assessment</title><p>Beyond diagnostic accuracy, we examined the implementation of explainability features using a proposed cascade framework (<xref ref-type="table" rid="table6">Table 6</xref>), assessing progression from no mention through clinical evaluation of decision impact. The cascade revealed a progressive drop-off (<xref ref-type="fig" rid="figure5">Figure 5</xref>): 75% (15/20) of studies did not mention explainability at all (level 0). A total of 3 studies (3/20, 15%) implemented XAI during development but did not provide explanations to clinical users (level 1): Chen et al [<xref ref-type="bibr" rid="ref13">13</xref>] used Grad-CAM (Gradient-weighted Class Activation Mapping) and SHAP (Shapley Additive Explanation) values, Nath et al [<xref ref-type="bibr" rid="ref23">23</xref>] used heatmaps and attention mechanisms, and Poli et al [<xref ref-type="bibr" rid="ref26">26</xref>] used Grad-CAM. Only 2 of 10 (10%) studies provided explanations to clinical users (level 2): Jayaraman et al [<xref ref-type="bibr" rid="ref18">18</xref>] displayed heatmaps to radiologists, and Yang et al [<xref ref-type="bibr" rid="ref27">27</xref>] showed bounding boxes with confidence scores to health care workers. Critically, no studies assessed whether clinicians understood the explanations (level 3) or evaluated whether XAI influenced clinical decisions (levels 4&#x2010;5), representing a complete drop-off above level 2.</p><p>Among studies mentioning XAI, the techniques included Grad-CAM activation maps (n=2), heatmaps (n=2), attention mechanisms (n=2), bounding boxes with confidence scores (n=1), and SHAP values (n=1); no studies implemented Local Interpretable Model-Agnostic Explanations.</p><table-wrap id="t6" position="float"><label>Table 6.</label><caption><p>XAI<sup><xref ref-type="table-fn" rid="table6fn1">a</xref></sup> assessment. Cascade level: 0=not mentioned, 1=implemented only, 2=shown to users, 3=understanding assessed, 4=decision impact evaluated, 5=full evaluation.</p></caption><table id="table6" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Author</td><td align="left" valign="bottom">XAI mentioned</td><td align="left" valign="bottom">Technique</td><td align="left" valign="bottom">Shown to users</td><td align="left" valign="bottom">Understanding assessed</td><td align="left" valign="bottom">Decision impact</td><td align="left" valign="bottom">Cascade level</td></tr></thead><tbody><tr><td align="left" valign="top">Avgerinos et al [<xref ref-type="bibr" rid="ref10">10</xref>]</td><td align="left" valign="top">No</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table6fn2">b</xref></sup></td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Berg et al [<xref ref-type="bibr" rid="ref11">11</xref>]</td><td align="left" valign="top">No</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Cao et al [<xref ref-type="bibr" rid="ref12">12</xref>]</td><td align="left" valign="top">No</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Chen et al [<xref ref-type="bibr" rid="ref13">13</xref>]</td><td align="left" valign="top">Yes</td><td align="left" valign="top">GC<sup><xref ref-type="table-fn" rid="table6fn3">c</xref></sup>, SHAP<sup><xref ref-type="table-fn" rid="table6fn4">d</xref></sup></td><td align="left" valign="top">NR<sup><xref ref-type="table-fn" rid="table6fn5">e</xref></sup></td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top">Fergus et al [<xref ref-type="bibr" rid="ref14">14</xref>]</td><td align="left" valign="top">No</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Heydon et al [<xref ref-type="bibr" rid="ref15">15</xref>]</td><td align="left" valign="top">No</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Iacob et al [<xref ref-type="bibr" rid="ref16">16</xref>]</td><td align="left" valign="top">No</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Jaremko et al [<xref ref-type="bibr" rid="ref17">17</xref>]</td><td align="left" valign="top">No</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Jayaraman et al [<xref ref-type="bibr" rid="ref18">18</xref>]</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Heatmap, ATT<sup><xref ref-type="table-fn" rid="table6fn6">f</xref></sup></td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">2</td></tr><tr><td align="left" valign="top">Kazemzadeh et al [<xref ref-type="bibr" rid="ref19">19</xref>]</td><td align="left" valign="top">No</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Love et al [<xref ref-type="bibr" rid="ref20">20</xref>]</td><td align="left" valign="top">No</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Malherbe [<xref ref-type="bibr" rid="ref21">21</xref>]</td><td align="left" valign="top">No</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Marquez et al [<xref ref-type="bibr" rid="ref22">22</xref>]</td><td align="left" valign="top">No</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Nath et al [<xref ref-type="bibr" rid="ref23">23</xref>]</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Heatmap, ATT</td><td align="left" valign="top">NR</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top">Nothnagel and Aslam [<xref ref-type="bibr" rid="ref24">24</xref>]</td><td align="left" valign="top">No</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Papachristou et al [<xref ref-type="bibr" rid="ref25">25</xref>]</td><td align="left" valign="top">No</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Poli et al [<xref ref-type="bibr" rid="ref26">26</xref>]</td><td align="left" valign="top">Yes</td><td align="left" valign="top">GC</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top">Yang et al [<xref ref-type="bibr" rid="ref27">27</xref>]</td><td align="left" valign="top">Yes</td><td align="left" valign="top">BB<sup><xref ref-type="table-fn" rid="table6fn7">g</xref></sup>, Conf<sup><xref ref-type="table-fn" rid="table6fn8">h</xref></sup></td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">2</td></tr><tr><td align="left" valign="top">Yu et al [<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="left" valign="top">No</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Zhu et al [<xref ref-type="bibr" rid="ref29">29</xref>]</td><td align="left" valign="top">No</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">0</td></tr></tbody></table><table-wrap-foot><fn id="table6fn1"><p><sup>a</sup>XAI: explainable artificial intelligence.</p></fn><fn id="table6fn2"><p><sup>b</sup>Not available.</p></fn><fn id="table6fn3"><p><sup>c</sup>GC: Gradient-weighted Class Activation Mapping.</p></fn><fn id="table6fn4"><p><sup>d</sup>SHAP: Shapley Additive Explanations.</p></fn><fn id="table6fn5"><p><sup>e</sup>NR: not reported.</p></fn><fn id="table6fn6"><p><sup>f</sup>ATT: attention mechanism.</p></fn><fn id="table6fn7"><p><sup>g</sup>BB: bounding box.</p></fn><fn id="table6fn8"><p><sup>h</sup>Conf: confidence score.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Illustrating the distribution of studies across explainability implementation levels.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v5i1e80928_fig05.png"/></fig></sec><sec id="s3-12"><title>Clinical Outcomes Beyond Diagnostic Accuracy</title><p>A second critical finding is the evidence gap between technical validation and clinical impact. Applying the clinical impact pyramid framework (<xref ref-type="table" rid="table7">Table 7</xref>), the distribution revealed (<xref ref-type="fig" rid="figure6">Figure 6</xref>): 15% (3/20) of studies reported technical accuracy only (level 0: Kazemzadeh et al [<xref ref-type="bibr" rid="ref19">19</xref>], Love et al [<xref ref-type="bibr" rid="ref20">20</xref>], Yang et al [<xref ref-type="bibr" rid="ref27">27</xref>]); 65% (13/20) reported process outcomes such as time to diagnosis and workflow efficiency (level 1); and 20% (4/20) documented clinical actions based on AI recommendations (level 2: Chen et al [<xref ref-type="bibr" rid="ref13">13</xref>], Iacob et al [<xref ref-type="bibr" rid="ref16">16</xref>], Jaremko et al [<xref ref-type="bibr" rid="ref17">17</xref>], and Malherbe [<xref ref-type="bibr" rid="ref21">21</xref>]). Critically, no studies measured patient-level health outcomes (level 3) or health system impact (levels 4&#x2010;5). Specific outcomes reported included workflow efficiency (10/20, 50%), time to diagnosis (9/20, 45%; AI processing typically &#x003C;1 min), referral documentation (6/20, 30%), and cost analysis (5/20, 25%; projection-based rather than measured savings). The absence of patient outcome data is notable, given that several studies enrolled thousands of patients in operational settings (Heydon et al, n=30,405 [<xref ref-type="bibr" rid="ref15">15</xref>]; Jayaraman et al, n=25,598 [<xref ref-type="bibr" rid="ref18">18</xref>]; Marquez et al, n=5740 [<xref ref-type="bibr" rid="ref22">22</xref>]), where outcome tracking was feasible.</p><table-wrap id="t7" position="float"><label>Table 7.</label><caption><p>Clinical outcomes beyond diagnostic accuracy. Impact level: 0=technical accuracy only, 1=process outcomes, 2=clinical actions documented, 3=patient outcomes measured, 4=health system impact, and 5=population health outcomes.</p></caption><table id="table7" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Author</td><td align="left" valign="bottom">Workflow outcomes</td><td align="left" valign="bottom">Referrals</td><td align="left" valign="bottom">Time impact</td><td align="left" valign="bottom">Cost analysis</td><td align="left" valign="bottom">Patient outcomes</td><td align="left" valign="bottom">Impact level</td></tr></thead><tbody><tr><td align="left" valign="top">Avgerinos et al [<xref ref-type="bibr" rid="ref10">10</xref>]</td><td align="left" valign="top">Yes</td><td align="left" valign="top">17/53 (32%) discharged</td><td align="left" valign="top">Yes (37 min)</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top">Berg et al [<xref ref-type="bibr" rid="ref11">11</xref>]</td><td align="left" valign="top">Yes</td><td align="left" valign="top">NR<sup><xref ref-type="table-fn" rid="table7fn1">a</xref></sup></td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top">Cao et al [<xref ref-type="bibr" rid="ref12">12</xref>]</td><td align="left" valign="top">No</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top">Chen et al [<xref ref-type="bibr" rid="ref13">13</xref>]</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">2</td></tr><tr><td align="left" valign="top">Fergus et al [<xref ref-type="bibr" rid="ref14">14</xref>]</td><td align="left" valign="top">Yes</td><td align="left" valign="top">NR</td><td align="left" valign="top">Yes (2&#x2010;3 s)</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top">Heydon et al [<xref ref-type="bibr" rid="ref15">15</xref>]</td><td align="left" valign="top">Yes</td><td align="left" valign="top">NR</td><td align="left" valign="top">No</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top">Iacob et al [<xref ref-type="bibr" rid="ref16">16</xref>]</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td><td align="left" valign="top">2</td></tr><tr><td align="left" valign="top">Jaremko et al [<xref ref-type="bibr" rid="ref17">17</xref>]</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes (&#x003C;1 min)</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">2</td></tr><tr><td align="left" valign="top">Jayaraman et al [<xref ref-type="bibr" rid="ref18">18</xref>]</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes (&#x003C;1 min)</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top">Kazemzadeh et al [<xref ref-type="bibr" rid="ref19">19</xref>]</td><td align="left" valign="top">No</td><td align="left" valign="top">NR</td><td align="left" valign="top">Yes (&#x003C;1 min)</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Love et al [<xref ref-type="bibr" rid="ref20">20</xref>]</td><td align="left" valign="top">No</td><td align="left" valign="top">NR</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Malherbe [<xref ref-type="bibr" rid="ref21">21</xref>]</td><td align="left" valign="top">Yes</td><td align="left" valign="top">NR</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">2</td></tr><tr><td align="left" valign="top">Marquez et al [<xref ref-type="bibr" rid="ref22">22</xref>]</td><td align="left" valign="top">Yes</td><td align="left" valign="top">NR</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top">Nath et al [<xref ref-type="bibr" rid="ref23">23</xref>]</td><td align="left" valign="top">Yes</td><td align="left" valign="top">NR</td><td align="left" valign="top">Yes (&#x003C;1 min)</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top">Nothnagel and Aslam [<xref ref-type="bibr" rid="ref24">24</xref>]</td><td align="left" valign="top">Yes</td><td align="left" valign="top">NR</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top">Papachristou et al [<xref ref-type="bibr" rid="ref25">25</xref>]</td><td align="left" valign="top">Yes</td><td align="left" valign="top">NR</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top">Poli et al [<xref ref-type="bibr" rid="ref26">26</xref>]</td><td align="left" valign="top">Yes</td><td align="left" valign="top">NR</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top">Yang et al [<xref ref-type="bibr" rid="ref27">27</xref>]</td><td align="left" valign="top">Yes</td><td align="left" valign="top">NR</td><td align="left" valign="top">No</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Yu et al [<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="left" valign="top">Yes</td><td align="left" valign="top">NR</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top">Zhu et al [<xref ref-type="bibr" rid="ref29">29</xref>]</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes (35.6 s)</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">1</td></tr></tbody></table><table-wrap-foot><fn id="table7fn1"><p><sup>a</sup>NR: not reported.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Distribution of evidence across clinical impact levels. The pyramid represents evidence maturity from technical validation (base) toward demonstration of patient benefit (apex).</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v5i1e80928_fig06.png"/></fig></sec><sec id="s3-13"><title>Methodological Quality</title><p>We assessed methodological quality using the QUADAS-2 tool. Assessment revealed substantial methodological concerns (<xref ref-type="table" rid="table8">Table 8</xref>; <xref ref-type="fig" rid="figure7">Figures 7</xref> and <xref ref-type="fig" rid="figure8">8</xref> [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref29">29</xref>]; detailed justifications for each judgment are provided in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>). Thus, 70% (14/20) of the evidence base carries high or very high RoB, substantially limiting confidence in reported performance estimates.</p><table-wrap id="t8" position="float"><label>Table 8.</label><caption><p>QUADAS-2<sup><xref ref-type="table-fn" rid="table8fn1">a</xref></sup> RoB<sup><xref ref-type="table-fn" rid="table8fn2">b</xref></sup> summary.</p></caption><table id="table8" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Author</td><td align="left" valign="bottom">D1<sup><xref ref-type="table-fn" rid="table8fn3">c</xref></sup> RoB</td><td align="left" valign="bottom">D2<sup><xref ref-type="table-fn" rid="table8fn4">d</xref></sup> RoB</td><td align="left" valign="bottom">D3<sup><xref ref-type="table-fn" rid="table8fn5">e</xref></sup> RoB</td><td align="left" valign="bottom">D4<sup><xref ref-type="table-fn" rid="table8fn6">f</xref></sup> RoB</td><td align="left" valign="bottom">D1 app<sup><xref ref-type="table-fn" rid="table8fn7">g</xref></sup></td><td align="left" valign="bottom">D2 app</td><td align="left" valign="bottom">D3 app</td><td align="left" valign="bottom">Overall</td></tr></thead><tbody><tr><td align="left" valign="top">Avgerinos et al [<xref ref-type="bibr" rid="ref10">10</xref>]</td><td align="left" valign="top">U<sup><xref ref-type="table-fn" rid="table8fn8">h</xref></sup></td><td align="left" valign="top">L<sup><xref ref-type="table-fn" rid="table8fn9">i</xref></sup></td><td align="left" valign="top">H<sup><xref ref-type="table-fn" rid="table8fn10">j</xref></sup></td><td align="left" valign="top">H</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">High</td></tr><tr><td align="left" valign="top">Berg et al [<xref ref-type="bibr" rid="ref11">11</xref>]</td><td align="left" valign="top">H</td><td align="left" valign="top">L</td><td align="left" valign="top">H</td><td align="left" valign="top">H</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">Very high</td></tr><tr><td align="left" valign="top">Cao et al [<xref ref-type="bibr" rid="ref12">12</xref>]</td><td align="left" valign="top">H</td><td align="left" valign="top">L</td><td align="left" valign="top">H</td><td align="left" valign="top">H</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">H</td><td align="left" valign="top">Very high</td></tr><tr><td align="left" valign="top">Chen et al [<xref ref-type="bibr" rid="ref13">13</xref>]</td><td align="left" valign="top">U</td><td align="left" valign="top">H</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">Moderate</td></tr><tr><td align="left" valign="top">Fergus et al [<xref ref-type="bibr" rid="ref14">14</xref>]</td><td align="left" valign="top">U</td><td align="left" valign="top">H</td><td align="left" valign="top">H</td><td align="left" valign="top">H</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">Very high</td></tr><tr><td align="left" valign="top">Heydon et al [<xref ref-type="bibr" rid="ref15">15</xref>]</td><td align="left" valign="top">L</td><td align="left" valign="top">U</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">Low</td></tr><tr><td align="left" valign="top">Iacob et al [<xref ref-type="bibr" rid="ref16">16</xref>]</td><td align="left" valign="top">U</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">Low-moderate</td></tr><tr><td align="left" valign="top">Jaremko et al [<xref ref-type="bibr" rid="ref17">17</xref>]</td><td align="left" valign="top">U</td><td align="left" valign="top">L</td><td align="left" valign="top">H</td><td align="left" valign="top">H</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">High</td></tr><tr><td align="left" valign="top">Jayaraman et al [<xref ref-type="bibr" rid="ref18">18</xref>]</td><td align="left" valign="top">U</td><td align="left" valign="top">U</td><td align="left" valign="top">H</td><td align="left" valign="top">U</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">H</td><td align="left" valign="top">High</td></tr><tr><td align="left" valign="top">Kazemzadeh et al [<xref ref-type="bibr" rid="ref19">19</xref>]</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">Low</td></tr><tr><td align="left" valign="top">Love et al [<xref ref-type="bibr" rid="ref20">20</xref>]</td><td align="left" valign="top">U</td><td align="left" valign="top">U</td><td align="left" valign="top">H</td><td align="left" valign="top">H</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">U</td><td align="left" valign="top">High</td></tr><tr><td align="left" valign="top">Malherbe [<xref ref-type="bibr" rid="ref21">21</xref>]</td><td align="left" valign="top">H</td><td align="left" valign="top">U</td><td align="left" valign="top">H</td><td align="left" valign="top">H</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">H</td><td align="left" valign="top">Very high</td></tr><tr><td align="left" valign="top">Marquez et al [<xref ref-type="bibr" rid="ref22">22</xref>]</td><td align="left" valign="top">H</td><td align="left" valign="top">L</td><td align="left" valign="top">H</td><td align="left" valign="top">H</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">High</td></tr><tr><td align="left" valign="top">Nath et al [<xref ref-type="bibr" rid="ref23">23</xref>]</td><td align="left" valign="top">U</td><td align="left" valign="top">U</td><td align="left" valign="top">L</td><td align="left" valign="top">U</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">Moderate</td></tr><tr><td align="left" valign="top">Nothnagel and Aslam [<xref ref-type="bibr" rid="ref24">24</xref>]</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">H</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">Moderate</td></tr><tr><td align="left" valign="top">Papachristou et al [<xref ref-type="bibr" rid="ref25">25</xref>]</td><td align="left" valign="top">U</td><td align="left" valign="top">L</td><td align="left" valign="top">H</td><td align="left" valign="top">H</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">High</td></tr><tr><td align="left" valign="top">Poli et al [<xref ref-type="bibr" rid="ref26">26</xref>]</td><td align="left" valign="top">U</td><td align="left" valign="top">L</td><td align="left" valign="top">H</td><td align="left" valign="top">H</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">High</td></tr><tr><td align="left" valign="top">Yang et al [<xref ref-type="bibr" rid="ref27">27</xref>]</td><td align="left" valign="top">H</td><td align="left" valign="top">U</td><td align="left" valign="top">U</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">High</td></tr><tr><td align="left" valign="top">Yu et al [<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="left" valign="top">H</td><td align="left" valign="top">H</td><td align="left" valign="top">L</td><td align="left" valign="top">H</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">High</td></tr><tr><td align="left" valign="top">Zhu et al [<xref ref-type="bibr" rid="ref29">29</xref>]</td><td align="left" valign="top">H</td><td align="left" valign="top">U</td><td align="left" valign="top">U</td><td align="left" valign="top">U</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">L</td><td align="left" valign="top">High</td></tr></tbody></table><table-wrap-foot><fn id="table8fn1"><p><sup>a</sup>QUADAS-2: Quality Assessment of Diagnostic Accuracy Studies 2.</p></fn><fn id="table8fn2"><p><sup>b</sup>RoB: risk of bias.</p></fn><fn id="table8fn3"><p><sup>c</sup>D1: patient selection.</p></fn><fn id="table8fn4"><p><sup>d</sup>D2: index test.</p></fn><fn id="table8fn5"><p><sup>e</sup>D3: reference standard.</p></fn><fn id="table8fn6"><p><sup>f</sup>D4: flow and timing.</p></fn><fn id="table8fn7"><p><sup>g</sup>App: applicability concerns.</p></fn><fn id="table8fn8"><p><sup>h</sup>U: unclear risk.</p></fn><fn id="table8fn9"><p><sup>i</sup>L: low risk.</p></fn><fn id="table8fn10"><p><sup>j</sup>H: high risk.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure7"><label>Figure 7.</label><caption><p>Summarizing the risk of bias and applicability concerns across all QUADAS-2 domains. QUADAS-2: Quality Assessment of Diagnostic Accuracy Studies 2 [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref29">29</xref>].</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v5i1e80928_fig07.png"/></fig><fig position="float" id="figure8"><label>Figure 8.</label><caption><p>Summarizing the proportion of studies at low, unclear, and high risk of bias across all QUADAS-2 domains. QUADAS-2: Quality Assessment of Diagnostic Accuracy Studies 2.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v5i1e80928_fig08.png"/></fig><p>Domain-level assessment revealed that the index test domain (domain 2) was the strongest, with 50% (10/20) of studies classified as being at low risk, reflecting reduced bias from automated AI interpretation. Patient selection (domain 1) revealed a 50% (10/20) unclear risk due to inadequate enrollment reporting, with high-risk studies relying on convenience sampling or case-control designs. The reference standard (domain 3) indicated a 55% (11/20) high risk, primarily due to differential verification bias; notably, Malherbe [<xref ref-type="bibr" rid="ref21">21</xref>] did not have a reference standard. Flow and timing (domain 4) showed a 60% (12/20) high-risk rating, reflecting substantial exclusions and inadequate patient accounting.</p><p>Applicability concerns were generally low across all domains: patient selection 100% (20/20), index test 100% (20/20), and reference standard 80% (16/20) low concern, suggesting that, despite concerns about RoB, the studied populations and AI systems are relevant to the review question.</p><p>The most common methodological issues were verification bias (14/20, 70%) of studies, where only positive cases received the reference standard or substantial proportions were unverified; selection bias (10/20, 50%), involving nonconsecutive or convenience sampling; post hoc threshold optimization (6/20, 30%), potentially inflating performance estimates; and high exclusion rates (6/20, 30%), with some studies excluding over 20% of enrolled patients.</p></sec><sec id="s3-14"><title>Sensitivity Analysis</title><p>Given that 70% (14/20) of studies were rated as having a high or very high RoB, we conducted a sensitivity analysis restricted to studies with a low-to-moderate RoB (6/20, 30%; detailed in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>). Sensitivity ranges narrowed from 62.5%-100% to 87%-100% (median 93.6%, IQR 87%-98% vs 90%, IQR 88%-95.7%), and specificity ranges narrowed from 28.1%-100% to 54%-96.5% (median 90.6%, IQR 74.5%-96.7% vs 85%, IQR 62%-93.5%). Median AUC remained stable at 0.91 (IQR 0.82-0.96) for the full cohort versus 0.91 (IQR 0.86-0.96) for the quality-restricted subset. These narrower ranges reflected elimination of outliers rather than systematic inflation in lower-quality studies, suggesting primary diagnostic accuracy conclusions are robust to quality concerns.</p></sec><sec id="s3-15"><title>Reporting Bias Assessment</title><p>A qualitative assessment of reporting bias revealed several limitations (<xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>). The comprehensiveness of the literature search was moderate: while 4 major databases (PubMed, Scopus, IEEE Xplore, and Web of Science) were systematically searched, gray literature, conference proceedings, preprint servers, and trial registries were not included, potentially missing unpublished studies with null or negative findings. Language restrictions to English-only studies may have excluded relevant evidence from non-English sources.</p><p>Publication bias indicators suggested moderate concern. Commercial AI systems predominated (12/20, 60%), potentially introducing bias in favor of favorable results from vendor-sponsored research. Small-study effects were evident: small studies (n&#x003C;200, 5/20, 25%) demonstrated narrower and higher performance ranges (sensitivity 85.7%&#x2010;100%, specificity 71.1%&#x2010;100%) compared to extensive studies (n&#x003E;1000, 8/20, 40%; sensitivity 62.5%&#x2010;98%, specificity 28.1%&#x2010;97.6%), consistent with preferential publication of favorable results from smaller studies.</p><p>Selective outcome reporting was moderate. Core sensitivity and specificity metrics were well reported (18/20, 90%, and 17/20, 85% of studies, respectively), but secondary metrics showed substantial gaps: AUC (10/20, 50%), PPV (10/20, 50%), NPV (10/20, 50%), sensitivity 95% CI (13/20, 65%), and specificity 95% CI (12/20, 60%). A total of 1 study reported no diagnostic performance metrics. However, the narrative synthesis approach and substantial heterogeneity limit the impact of these biases on review conclusions compared to meta-analytic approaches.</p></sec><sec id="s3-16"><title>Certainty of Evidence</title><p>GRADE assessment revealed very low certainty in diagnostic accuracy estimates (<xref ref-type="table" rid="table9">Table 9</xref>). Starting from a high certainty level per GRADE guidance, the evidence was downgraded to 6 levels across all domains. The RoB was rated as of grave concern (2-level downgrade), given that 70% (14/20) of studies were at high or very high risk, with verification bias (14/20, 70%), selection bias (10/20, 50%), and post hoc threshold optimization (6/20, 30%) being the predominant concerns. Indirectness was rated as a serious concern (1-level downgrade) due to heterogeneity across 12 conditions, 6 modalities, and 18 AI systems. Inconsistency was rated as a serious concern, given the wide performance variation (sensitivity: 62.5%&#x2010;100%, specificity: 28.1%&#x2010;100%); even tuberculosis studies showed specificity ranging from 28.1% to 96.9%. Imprecision was rated a serious concern because 30% (6/20) of studies lacked CIs, and 25% (5/20) were small (n&#x003C;200). Publication bias was rated as a serious concern due to the predominance of commercial systems (12/20, 60%) and small-study effects.</p><table-wrap id="t9" position="float"><label>Table 9.</label><caption><p>GRADE<sup><xref ref-type="table-fn" rid="table9fn1">a</xref></sup> certainty of evidence assessment.</p></caption><table id="table9" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Domain</td><td align="left" valign="bottom">Concern level</td><td align="left" valign="bottom">Rationale</td><td align="left" valign="bottom">Downgrade</td></tr></thead><tbody><tr><td align="left" valign="top">Risk of bias</td><td align="left" valign="top">Very serious</td><td align="left" valign="top">70% (14/20) of studies at high or very high risk of bias, predominantly due to verification bias (14/20, 70%), selection bias (10/20, 50%), and post hoc threshold optimization (6/20, 30%). Reference standard (11/20, 55%, high risk) and flow or timing (12/20, 60%, high risk) were the weakest domains.</td><td align="left" valign="top">&#x2212;2</td></tr><tr><td align="left" valign="top">Indirectness</td><td align="left" valign="top">Serious</td><td align="left" valign="top">Extreme heterogeneity across 12 conditions, 6 imaging modalities, and 18 AI systems limits applicability to specific clinical scenarios despite appropriate point-of-care contexts.</td><td align="left" valign="top">&#x2212;1</td></tr><tr><td align="left" valign="top">Inconsistency</td><td align="left" valign="top">Serious</td><td align="left" valign="top">Wide performance variation (sensitivity 62.5%&#x2010;100%, specificity 28.1%&#x2010;100%); even within tuberculosis studies, specificity ranged 28.1%&#x2010;96.9%.</td><td align="left" valign="top">&#x2212;1</td></tr><tr><td align="left" valign="top">Imprecision</td><td align="left" valign="top">Serious</td><td align="left" valign="top">30% of studies lack CIs, 15% report very wide CIs (&#x003E;20 points), 25% are small studies (n&#x003C;200).</td><td align="left" valign="top">&#x2212;1</td></tr><tr><td align="left" valign="top">Publication bias</td><td align="left" valign="top">Serious</td><td align="left" valign="top">Commercial system predominance (12/20, 60%), gray literature not searched, small-study effects evident.</td><td align="left" valign="top">&#x2212;1</td></tr><tr><td align="left" valign="top">Overall certainty</td><td align="left" valign="top">Very low (&#x2295;&#x25EF;&#x25EF;&#x25EF;)</td><td align="left" valign="top">Total downgrade of 6 levels from a high certainty starting point. True diagnostic performance may be substantially different from reported estimates.</td><td align="left" valign="top">&#x2212;6</td></tr></tbody></table><table-wrap-foot><fn id="table9fn1"><p><sup>a</sup>GRADE: Grading of Recommendations, Assessment, Development, and Evaluation.</p></fn></table-wrap-foot></table-wrap><p>The very low certainty rating suggests that the proper diagnostic performance may differ substantially from the reported estimates. While median sensitivity (93.6%, IQR 87%-98%) and specificity (90.6%, IQR 74.5%-96.7%) appear promising, decision-makers should interpret these values cautiously, recognizing that real-world performance is likely to differ from the published results. Local validation remains essential before clinical adoption, and current evidence supports pilot implementation rather than widespread deployment without addressing methodological and heterogeneity concerns.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>This systematic review of 20 studies evaluating AI in point-of-care imaging for clinical decision support reveals a rapidly evolving field, with 85% (17/20) of studies published since 2023 and approximately 78,000 patients enrolled across 15 countries. Five principal findings emerge with distinct implications for research, practice, and policy: (1) AI enables meaningful task-shifting from specialists to nonspecialist health care workers; (2) training requirements are remarkably minimal; (3) a profound gap exists between regulatory expectations for AI transparency and actual explainability implementation; (4) no studies measured patient-level health outcomes; and (5) the absence of cross-context validation undermines claims of global applicability.</p><p>This review provides the first systematic evidence synthesis demonstrating that AI enables meaningful task-shifting from specialists to nonspecialist health care workers across diverse point-of-care imaging applications. A total of 13 (65%) studies, 13/20 explicitly evaluated task-shifting, consistently showing that nonspecialists with AI assistance achieved diagnostic performance comparable to specialists. Key exemplars include Love et al [<xref ref-type="bibr" rid="ref20">20</xref>] (100% sensitivity or specificity after 30 min of training), Avgerinos et al [<xref ref-type="bibr" rid="ref10">10</xref>] and Nothnagel and Aslam [<xref ref-type="bibr" rid="ref24">24</xref>] (100% DVT sensitivity with 1 hour training), and Nath et al [<xref ref-type="bibr" rid="ref23">23</xref>] (technicians outperforming radiologists for tuberculosis detection). The consistency of this finding across diverse conditions&#x2014;including deep vein thrombosis, breast masses, tuberculosis, and hip dysplasia&#x2014;suggests the generalizability of the task-shifting paradigm, with policy implications for workforce planning, revision of training curricula, and regulatory frameworks for nonspecialist AI-assisted diagnostics.</p><p>This review also quantifies remarkably minimal training requirements for practical AI-assisted point-of-care imaging. The median training duration was only 1 hour (range 30 minutes to 6 months), with several studies achieving excellent performance after 30 minutes. Critically, no dose-response relationship was observed between training duration and diagnostic performance. This finding challenges traditional assumptions about extensive training requirements for medical imaging interpretation. The AI effectively serves as an &#x201C;expert in the pocket,&#x201D; enabling rapid deployment of diagnostic capabilities. The implications for scalability are profound, particularly in LMIC and LIC contexts, where traditional imaging training programs requiring months or years may be unnecessary when AI provides real-time guidance.</p><p>A critical finding concerns the gap between regulatory expectations for AI transparency and actual implementation of explainability. The XAI cascade revealed a progressive drop-off from 75% (15/20) nonmentioning (level 0) to only 10% (2/20) user presentation (level 2), with no studies reaching levels 3&#x2010;5&#x2014;a field-wide gap requiring urgent attention. The findings suggest XAI is being treated as a development tool rather than a clinical communication mechanism. Regulatory compliance with the European Union AI Act and FDA guidance, as well as establishing clinical trust and appropriate AI usage, both depend on addressing this gap.</p><p>These findings have direct implications for physician autonomy&#x2014;the capacity of clinicians to exercise independent judgment over diagnosis and treatment. The near absence of clinician-facing explainability (only 10% of studies) limits physicians&#x2019; ability to evaluate AI recommendations critically. When AI functions as opaque, physicians cannot meaningfully integrate AI outputs with their clinical reasoning; as Grote and Berens [<xref ref-type="bibr" rid="ref30">30</xref>] argue, this uncertainty potentially undermines the epistemic authority of clinicians, risking either uncritical deference to algorithmic outputs or dismissal of potentially valuable decision support. Similarly, AI-enabled task-shifting, while extending diagnostic capacity, reshapes traditional boundaries of professional responsibility; physicians retain accountability for patient outcomes even when nonspecialists perform AI-assisted imaging. Future research should explicitly assess how AI implementation affects physician autonomy, including whether XAI restores clinician control and how task-shifting protocols maintain appropriate oversight structures.</p><p>The clinical impact pyramid revealed a substantial disconnect between technical validation and clinical benefit. While 15% (3/20) of studies reported technical accuracy alone, 65% (13/20) of studies progressed to process outcomes such as time to diagnosis and workflow efficiency. Although 20% (4/20) of studies documented clinical actions based on AI recommendations, no studies measured patient-level health outcomes or assessed the impact on the health system. Decision-makers cannot currently determine whether AI-assisted point-of-care imaging improves patient outcomes or represents a sound investment in health care. This gap is particularly critical in LMIC contexts, where resource allocation decisions require evidence of outcomes. The focus on diagnostic accuracy metrics reflects methodological traditions of diagnostic test evaluation, but leaves the fundamental question unanswered: Does AI-assisted point-of-care imaging help patients?</p><p>While 35% (7/20) of studies were conducted in LMIC or LIC settings, no studies validated AI systems across different resource contexts. Performance in LMIC and LIC settings was slightly lower (median sensitivity 88% versus 95.2% in HICs), but the absence of cross-context validation prevents conclusions about true generalizability. Before scaling AI systems to underserved populations, multicountry validation is essential to address differences in disease prevalence, imaging equipment, image quality, and patient populations.</p><p>These promising findings must be interpreted in light of the substantial methodological concerns detailed in the &#x201C;Limitations of the Included Evidence&#x201D; section below. With 70% (14/20) of studies at high or very high RoB, real-world performance may be lower than reported, and the GRADE assessment of very low certainty reflects appropriate caution. These findings must be contextualized within the existing literature to assess their novelty and significance.</p></sec><sec id="s4-2"><title>Comparison With Prior Work</title><p>This review addresses gaps not covered by existing systematic reviews in this domain. Kossoff et al [<xref ref-type="bibr" rid="ref1">1</xref>] examined automated lung ultrasound analysis for pneumothorax detection; yet, their review was limited to a single modality and condition, without requiring point-of-care validation. Cold et al [<xref ref-type="bibr" rid="ref2">2</xref>] focused on AI in bronchoscopy within specialist procedural settings rather than point-of-care contexts. Rambabu et al [<xref ref-type="bibr" rid="ref3">3</xref>] evaluated AI for papilloedema detection using fundus photography; however, the included studies were predominantly retrospective and lacked clinical outcome assessment. Sunny et al [<xref ref-type="bibr" rid="ref4">4</xref>] reviewed biomarker-integrated cytopathology for oral lesion detection, a laboratory-based methodology fundamentally distinct from point-of-care imaging.</p><p>Our review extends beyond these prior works in several critical dimensions. First, we provide the first cross-modality synthesis encompassing ultrasound, chest radiography, photography, fundus imaging, microscopy, and dermoscopy within point-of-care settings. Second, we developed and applied proposed frameworks&#x2014;the XAI implementation cascade and clinical impact pyramid&#x2014;that enable systematic identification of evidence gaps not previously characterized. Third, we provide the first quantitative characterization of task-shifting success and training requirements across point-of-care imaging applications.</p><p>The XAI and patient outcome gaps we identified are consistent with broader patterns in medical AI literature. Previous reviews of AI in radiology and pathology have similarly noted the predominance of diagnostic accuracy studies without clinical outcome assessment, suggesting this represents a field-wide methodological pattern requiring systematic attention rather than an isolated finding within point-of-care imaging.</p></sec><sec id="s4-3"><title>Implications</title><p>These findings carry important implications for clinical practice, research priorities, and health policy.</p><p>Clinicians considering AI-assisted point-of-care imaging should recognize both promise and limitations. Diagnostic accuracy data support cautious adoption for specific applications, particularly tuberculosis screening, for which multiple studies demonstrate consistent performance. However, local validation remains essential before deploying systems that have been validated in different contexts. Training programs should be competency-based rather than duration-based; evidence suggests that brief training can be sufficient when AI provides real-time guidance, but competency assessment remains essential. The AI serves as decision support rather than autonomous diagnostics, and clinical judgment remains critical, particularly when AI recommendations conflict with clinical findings. Health care organizations implementing AI-assisted imaging should document clinical actions and patient outcomes; real-world performance data will build the evidence base currently lacking and enable quality improvement.</p><p>Based on the identified evidence gaps, we make the following explicit recommendations for future research. XAI evaluation studies should assess not only implementation but also clinician understanding and the impact on decision-making, using mixed methods that include cognitive task analysis and decision quality metrics. Implementation research with patient outcomes as primary end points, using cluster-randomized trials to compare AI-assisted vs standard care pathways, is crucial for determining whether these systems enhance patient health. Multicountry and multicontext validation is needed before claims of global applicability can be substantiated, including sites across HIC, upper-middle-income country, LMIC, and LIC settings. Methodological improvements, including consecutive enrollment, complete verification, prespecified thresholds, and reporting per STARD-AI (Standards for Reporting of Diagnostic Accuracy Studies&#x2013;Artificial Intelligence) guidelines, would substantially strengthen the evidence base. Additional priorities include cost-effectiveness analyses using standardized methods and health system perspectives, longitudinal monitoring frameworks for deployed systems that track performance drift and recalibration needs, and equity-focused analyses that explicitly address whether AI reduces or exacerbates health disparities.</p><p>Policy implications are equally substantial, with 5 key recommendations. First, regulatory frameworks should evolve beyond requiring XAI implementation to requiring evidence of XAI evaluation and effectiveness; approval pathways should assess whether explanations are understood and valuable rather than merely present. Second, funding agencies should shift priorities from additional diagnostic accuracy studies toward implementation research with patient outcome end points for conditions where technical performance is established. Third, health technology assessment bodies should require cross-context validation evidence before recommending the adoption of systems in settings different from those in which they were initially validated. Fourth, workforce policy should anticipate task-shifting implications, as evidence suggests nonspecialists can achieve specialist-level performance with minimal training, with implications for training curricula, scope of practice regulations, and workforce planning. Fifth, global health initiatives should invest in validation infrastructure for LMIC and LIC settings, where the current evidence base is insufficient to support confident deployment despite these settings having the greatest need.</p></sec><sec id="s4-4"><title>Strengths</title><p>This review has several methodological strengths. A comprehensive search across 4 databases (PubMed, Scopus, IEEE Xplore, and Web of Science) with rigorous dual-reviewer screening addressed the challenges inherent in identifying genuine point-of-care implementations amid aspirational claims. The strict requirement for actual point-of-care deployment or validation, rather than theoretical potential, ensures the included evidence reflects real-world implementation. Thorough QUADAS-2 quality assessment with prespecified synthesis rules, sensitivity analysis restricted to studies with low-to-moderate RoB (6/20), and GRADE certainty evaluation ensured transparent and reproducible evidence appraisal.</p></sec><sec id="s4-5"><title>Limitations of the Included Evidence</title><p>The evidence base has substantial limitations that temper interpretation of the generally favorable diagnostic accuracy findings. First, 70% (14/20) of studies carried a high or very high RoB, predominantly due to verification bias (14/20, 70%), where only screen-positive cases received reference standard confirmation, and selection bias (10/20, 50%), which involved convenience sampling rather than consecutive sampling. These methodological weaknesses likely inflate reported sensitivity and specificity estimates. Second, post hoc threshold optimization in 30% (6/20) of studies may have overfitted performance to specific datasets, limiting generalizability. Third, substantial clinical heterogeneity across 12 conditions, 6 imaging modalities, and 18 distinct AI systems limits the applicability of aggregate findings to any specific clinical scenario. Fourth, incomplete outcome reporting&#x2014;with 30% (6/20) of studies lacking CIs and secondary metrics (AUC, PPV, and NPV) reported in only 50% (10/20) of the studies&#x2014;impairs the precision of effect estimates. Fifth, the predominance of commercial systems (12/20, 60%) raises concerns about the selective publication of favorable results. Collectively, these evidence limitations suggest that actual diagnostic performance in routine clinical practice may be lower than reported estimates, and the GRADE assessment of very low certainty (&#x2295;&#x25EF;&#x25EF;&#x25EF;) reflects this uncertainty.</p></sec><sec id="s4-6"><title>Limitations of the Review Process</title><p>Several limitations of this review warrant consideration. First, restricting our search to English-language publications may have excluded relevant studies from non-English contexts; given that 35% (7/20) of included studies originated from LMIC/LIC settings where non-English publication is common, this restriction may have introduced selection bias that underrepresents the global evidence base. Second, excluding gray literature&#x2014;including conference proceedings, preprints, and technical reports&#x2014;may have introduced publication bias by omitting negative or null findings, though this decision prioritized peer-reviewed evidence quality for clinical decision-making. Third, encountering substantial heterogeneity across conditions, modalities, and AI systems precluded meta-analysis, limiting our ability to generate pooled effect estimates that would enable more precise conclusions. Fourth, concluding our search in November 2025 means that subsequently published studies are not captured, potentially missing recent methodological advances given the rapid evolution of this field.</p></sec><sec id="s4-7"><title>Conclusions</title><p>AI-assisted point-of-care imaging demonstrates promising diagnostic accuracy (median sensitivity 93.6%, IQR 87%-98%; median specificity 90.6%, IQR 74.5%-96.7%) and enables meaningful task shifting to nonspecialist operators with minimal training. These findings support cautious optimism regarding AI&#x2019;s potential to democratize diagnostic capabilities in specialist-scarce settings.</p><p>However, 4 critical evidence gaps preclude confident recommendations for widespread adoption. The explainability gap is profound, with 75% (15/20) of studies not mentioning XAI and none evaluating whether explanations influenced clinical decisions, creating misalignment with regulatory requirements. The patient outcome gap is equally concerning, as no studies measured whether AI-assisted imaging actually improves patient health. The cross-context validation gap prevents conclusions about global applicability, and methodological quality concerns&#x2014;with 70% (14/20) of studies at high or very high RoB&#x2014;suggest reported performance may be optimistic.</p><p>The field requires urgent reorientation toward implementation research with patient outcome end points, rigorous XAI evaluation demonstrating clinical utility, and multicontext validation. Regulatory frameworks should require evidence of explainability effectiveness, not merely its presence. Only by systematically addressing these gaps can the field transition from promising technology demonstration to evidence-based clinical adoption that demonstrably improves patient outcomes.</p></sec></sec></body><back><ack><p/><p>Generative artificial intelligence (AI) usage: the authors used Claude (Anthropic PBC) to assist with paper editing and figure creation. All AI-generated content was reviewed, verified, and revised by the authors, who take full responsibility for the accuracy and integrity of the final content.</p></ack><notes><sec><title>Funding</title><p>This research received no specific grant from any funding agency in the public, commercial, or not-for-profit sectors.</p></sec><sec><title>Data Availability</title><p>The following materials are publicly available:</p><p>1. Complete search strategies for all databases (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>)</p><p>2. Blank data extraction form template (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>)</p><p>3. Complete extracted data for all 20 included studies (<xref ref-type="supplementary-material" rid="app7">Multimedia Appendix 7</xref>)</p><p>4. Detailed QUADAS-2 quality assessments (<xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>)</p><p>5. Studies excluded at full-text review with reasons (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>)</p><p>6. Sensitivity analysis data and results (<xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>)</p><p>7. Reporting bias assessment details (<xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>)</p><p>8. GRADE certainty of evidence assessment (<xref ref-type="supplementary-material" rid="app8">Multimedia Appendix 8</xref>)</p><p>9. PRISMA 2020 checklist (<xref ref-type="supplementary-material" rid="app9">Checklist 1</xref>)</p><p>No custom analysis code was used as the synthesis was narrative. The complete reference list of screened studies is available from the corresponding author upon reasonable request.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: PW (lead), BZ (supporting), KE (supporting), AA (supporting), AGA (supporting)</p><p>Data curation: PW (lead), BZ (supporting)</p><p>Formal analysis: BZ (lead), PW (supporting)</p><p>Investigation: PW (lead), BZ (supporting)</p><p>Methodology: BZ (lead), PW (supporting)</p><p>Project administration: PW (lead), KE (supporting)</p><p>Resources: KE (lead)</p><p>Software: PW (lead), BZ (supporting)</p><p>Supervision: KE (lead)</p><p>Validation: BZ (lead), PW (supporting)</p><p>Visualization: PW (lead), BZ (supporting)</p><p>Writing&#x2013;original draft: PW (equal), BZ (equal)</p><p>Writing&#x2013;review &#x0026; editing: PW (equal), BZ (equal), KE (supporting), AA (supporting), AGA (supporting)</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">AUC</term><def><p>area under the receiver operating characteristic curve</p></def></def-item><def-item><term id="abb3">CDSS</term><def><p>clinical decision support systems</p></def></def-item><def-item><term id="abb4">CE</term><def><p>conformit&#x00E9; europ&#x00E9;enne</p></def></def-item><def-item><term id="abb5">DVT</term><def><p>deep vein thrombosis</p></def></def-item><def-item><term id="abb6">DVT</term><def><p>deep vein thrombosis</p></def></def-item><def-item><term id="abb7">FDA</term><def><p>Food and Drug Administration</p></def></def-item><def-item><term id="abb8">Grad-CAM</term><def><p>Gradient-weighted Class Activation Mapping</p></def></def-item><def-item><term id="abb9">GRADE</term><def><p>Grading of Recommendations, Assessment, Development and Evaluation</p></def></def-item><def-item><term id="abb10">HIC</term><def><p>high-income country</p></def></def-item><def-item><term id="abb11">LIC</term><def><p>low-income country</p></def></def-item><def-item><term id="abb12">LMIC</term><def><p>lower-middle-income country</p></def></def-item><def-item><term id="abb13">MeSH</term><def><p>Medical Subject Headings</p></def></def-item><def-item><term id="abb14">ML</term><def><p>machine learning</p></def></def-item><def-item><term id="abb15">NPV</term><def><p>negative predictive value</p></def></def-item><def-item><term id="abb16">PCC</term><def><p>Population, Concept, Context</p></def></def-item><def-item><term id="abb17">PPV</term><def><p>positive predictive value</p></def></def-item><def-item><term id="abb18">PRISMA</term><def><p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses</p></def></def-item><def-item><term id="abb19">QUADAS-2</term><def><p>Quality Assessment of Diagnostic Accuracy Studies 2</p></def></def-item><def-item><term id="abb20">RoB</term><def><p>risk of bias</p></def></def-item><def-item><term id="abb21">SAHPRA</term><def><p>South African Health Products Regulatory Authority</p></def></def-item><def-item><term id="abb22">SHAP</term><def><p>Shapley Additive Explanations</p></def></def-item><def-item><term id="abb23">STARD-AI</term><def><p>Standards for Reporting of Diagnostic Accuracy Studies&#x2013;Artificial Intelligence</p></def></def-item><def-item><term id="abb24">XAI</term><def><p>explainable artificial intelligence</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kossoff</surname><given-names>J</given-names> </name><name name-style="western"><surname>Duncan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Acharya</surname><given-names>J</given-names> </name><name name-style="western"><surname>Davis</surname><given-names>D</given-names> </name></person-group><article-title>Automated analysis of ultrasound for the diagnosis of pneumothorax: a systematic review</article-title><source>Cureus</source><year>2024</year><month>11</month><volume>16</volume><issue>11</issue><fpage>e72896</fpage><pub-id pub-id-type="doi">10.7759/cureus.72896</pub-id><pub-id pub-id-type="medline">39618742</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cold</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Vamadevan</surname><given-names>A</given-names> </name><name name-style="western"><surname>Laursen</surname><given-names>CB</given-names> </name><name name-style="western"><surname>Bjerrum</surname><given-names>F</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>S</given-names> </name><name name-style="western"><surname>Konge</surname><given-names>L</given-names> </name></person-group><article-title>Artificial intelligence in bronchoscopy: a systematic review</article-title><source>Eur Respir Rev</source><year>2025</year><month>04</month><volume>34</volume><issue>176</issue><fpage>240274</fpage><pub-id pub-id-type="doi">10.1183/16000617.0274-2024</pub-id><pub-id pub-id-type="medline">40436614</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rambabu</surname><given-names>L</given-names> </name><name name-style="western"><surname>Edmiston</surname><given-names>T</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>BG</given-names> </name><etal/></person-group><article-title>Detecting papilloedema as a marker of raised intracranial pressure using artificial intelligence: a systematic review</article-title><source>PLOS Digit Health</source><year>2025</year><month>09</month><volume>4</volume><issue>9</issue><fpage>e0000783</fpage><pub-id pub-id-type="doi">10.1371/journal.pdig.0000783</pub-id><pub-id pub-id-type="medline">40892792</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sunny</surname><given-names>SP</given-names> </name><name name-style="western"><surname>D R</surname><given-names>R</given-names> </name><name name-style="western"><surname>Hariharan</surname><given-names>A</given-names> </name><etal/></person-group><article-title>CD44-SNA1 integrated cytopathology for delineation of high grade dysplastic and neoplastic oral lesions</article-title><source>PLOS ONE</source><year>2023</year><volume>18</volume><issue>9</issue><fpage>e0291972</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0291972</pub-id><pub-id pub-id-type="medline">37747904</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="web"><article-title>Regulation (EU) 2024/1689 of the European Parliament and of the Council of 13 june 2024</article-title><source>European Union</source><year>2024</year><access-date>2026-03-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://eur-lex.europa.eu/eli/reg/2024/1689/oj">https://eur-lex.europa.eu/eli/reg/2024/1689/oj</ext-link></comment></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="web"><article-title>Artificial intelligence/machine learning (AI/ML)-based software as a medical device (samd) action plan</article-title><source>US Food and Drug Administration</source><year>2021</year><access-date>2026-03-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.fda.gov/media/145022/download">https://www.fda.gov/media/145022/download</ext-link></comment></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="web"><article-title>Transparency for machine learning-enabled medical devices: guiding principles</article-title><source>US Food and Drug Administration, Health Canada, Medicines and Healthcare products Regulatory Agency</source><year>2024</year><month>06</month><day>13</day><access-date>2026-03-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.fda.gov/medical-devices/software-medical-device-samd/transparency-machine-learning-enabled-medical-devices-guiding-principles">https://www.fda.gov/medical-devices/software-medical-device-samd/transparency-machine-learning-enabled-medical-devices-guiding-principles</ext-link></comment></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="web"><article-title>21st century cures act, public law 114-255, &#x00A7;3060, 130 stat 1033</article-title><source>US Congress</source><year>2016</year><month>12</month><day>13</day><access-date>2026-03-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.congress.gov/114/plaws/publ255/PLAW-114publ255.pdf">https://www.congress.gov/114/plaws/publ255/PLAW-114publ255.pdf</ext-link></comment></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="web"><article-title>Clinical decision support software: final guidance for industry and food and drug administration staff</article-title><source>US Food and Drug Administration</source><year>2026</year><month>01</month><day>29</day><access-date>2026-03-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.fda.gov/media/109618/download">https://www.fda.gov/media/109618/download</ext-link></comment></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Avgerinos</surname><given-names>E</given-names> </name><name name-style="western"><surname>Spiliopoulos</surname><given-names>S</given-names> </name><name name-style="western"><surname>Psachoulia</surname><given-names>F</given-names> </name><etal/></person-group><article-title>Novel artificial intelligence guided non-expert compression ultrasound deep vein thrombosis diagnostic pathway may reduce vascular laboratory venous testing</article-title><source>Eur J Vasc Endovasc Surg</source><year>2025</year><month>10</month><volume>70</volume><issue>4</issue><fpage>517</fpage><lpage>522</lpage><pub-id pub-id-type="doi">10.1016/j.ejvs.2025.04.070</pub-id><pub-id pub-id-type="medline">40378910</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Berg</surname><given-names>WA</given-names> </name><name name-style="western"><surname>L&#x00F3;pez Aldrete</surname><given-names>AL</given-names> </name><name name-style="western"><surname>Jairaj</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Toward AI-supported US triage of women with palpable breast lumps in a low-resource setting</article-title><source>Radiology</source><year>2023</year><month>05</month><volume>307</volume><issue>4</issue><fpage>e223351</fpage><pub-id pub-id-type="doi">10.1148/radiol.223351</pub-id><pub-id pub-id-type="medline">37129492</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cao</surname><given-names>X</given-names> </name><name name-style="western"><surname>Feng</surname><given-names>B</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Performance of computer-aided detection software in tuberculosis case finding in township health centers in China</article-title><source>Chronic Dis Transl Med</source><year>2025</year><month>06</month><volume>11</volume><issue>2</issue><fpage>140</fpage><lpage>147</lpage><pub-id pub-id-type="doi">10.1002/cdt3.70001</pub-id><pub-id pub-id-type="medline">40486951</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>X</given-names> </name><name name-style="western"><surname>Li</surname><given-names>M</given-names> </name><etal/></person-group><article-title>EE-explorer: a multimodal artificial intelligence system for eye emergency triage and primary diagnosis</article-title><source>Am J Ophthalmol</source><year>2023</year><month>08</month><volume>252</volume><fpage>253</fpage><lpage>264</lpage><pub-id pub-id-type="doi">10.1016/j.ajo.2023.04.007</pub-id><pub-id pub-id-type="medline">37142171</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fergus</surname><given-names>P</given-names> </name><name name-style="western"><surname>Chalmers</surname><given-names>C</given-names> </name><name name-style="western"><surname>Henderson</surname><given-names>W</given-names> </name><name name-style="western"><surname>Roberts</surname><given-names>D</given-names> </name><name name-style="western"><surname>Waraich</surname><given-names>A</given-names> </name></person-group><article-title>Pressure ulcer categorization and reporting in domiciliary settings using deep learning and mobile devices: a clinical trial to evaluate end-to-end performance</article-title><source>IEEE Access</source><year>2023</year><volume>11</volume><fpage>65138</fpage><lpage>65152</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2023.3289839</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Heydon</surname><given-names>P</given-names> </name><name name-style="western"><surname>Egan</surname><given-names>C</given-names> </name><name name-style="western"><surname>Bolter</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Prospective evaluation of an artificial intelligence-enabled algorithm for automated diabetic retinopathy screening of 30 000 patients</article-title><source>Br J Ophthalmol</source><year>2021</year><month>05</month><volume>105</volume><issue>5</issue><fpage>723</fpage><lpage>728</lpage><pub-id pub-id-type="doi">10.1136/bjophthalmol-2020-316594</pub-id><pub-id pub-id-type="medline">32606081</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Iacob</surname><given-names>MS</given-names> </name><name name-style="western"><surname>Kundnani</surname><given-names>NR</given-names> </name><name name-style="western"><surname>Sharma</surname><given-names>A</given-names> </name><name name-style="western"><surname>Iacob</surname><given-names>A</given-names> </name><name name-style="western"><surname>Dinu</surname><given-names>AR</given-names> </name><name name-style="western"><surname>Dragan</surname><given-names>SR</given-names> </name></person-group><article-title>Diagnostic accuracy of AI-assisted focused cardiac ultrasound (FOCUS) in primary care</article-title><source>Healthcare (Basel)</source><year>2025</year><month>10</month><day>29</day><volume>13</volume><issue>21</issue><fpage>2726</fpage><pub-id pub-id-type="doi">10.3390/healthcare13212726</pub-id><pub-id pub-id-type="medline">41228100</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jaremko</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Hareendranathan</surname><given-names>A</given-names> </name><name name-style="western"><surname>Bolouri</surname><given-names>SES</given-names> </name><name name-style="western"><surname>Frey</surname><given-names>RF</given-names> </name><name name-style="western"><surname>Dulai</surname><given-names>S</given-names> </name><name name-style="western"><surname>Bailey</surname><given-names>AL</given-names> </name></person-group><article-title>AI aided workflow for hip dysplasia screening using ultrasound in primary care clinics</article-title><source>Sci Rep</source><year>2023</year><month>06</month><day>7</day><volume>13</volume><issue>1</issue><fpage>9224</fpage><pub-id pub-id-type="doi">10.1038/s41598-023-35603-9</pub-id><pub-id pub-id-type="medline">37286559</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jayaraman</surname><given-names>P</given-names> </name><name name-style="western"><surname>S</surname><given-names>S</given-names> </name><name name-style="western"><surname>Paul</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Artificial intelligence as a proficient tool in detecting pulmonary   tuberculosis in massive population screening programs: a case study in Chennai,   India</article-title><source>J Rural Med</source><year>2025</year><volume>20</volume><issue>1</issue><fpage>13</fpage><lpage>19</lpage><pub-id pub-id-type="doi">10.2185/jrm.2024-015</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kazemzadeh</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kiraly</surname><given-names>AP</given-names> </name><name name-style="western"><surname>Nabulsi</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Prospective multi-site validation of AI to detect tuberculosis and chest x-ray abnormalities</article-title><source>NEJM AI</source><year>2024</year><month>10</month><volume>1</volume><issue>10</issue><fpage>AIoa2400018</fpage><pub-id pub-id-type="doi">10.1056/aioa2400018</pub-id><pub-id pub-id-type="medline">39823016</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Love</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Berg</surname><given-names>WA</given-names> </name><name name-style="western"><surname>Podilchuk</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Palpable breast lump triage by minimally trained operators in Mexico using computer-assisted diagnosis and low-cost ultrasound</article-title><source>J Glob Oncol</source><year>2018</year><month>08</month><volume>4</volume><issue>4</issue><fpage>1</fpage><lpage>9</lpage><pub-id pub-id-type="doi">10.1200/JGO.17.00222</pub-id><pub-id pub-id-type="medline">30156946</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Malherbe</surname><given-names>K</given-names> </name></person-group><article-title>Revolutionizing breast cancer screening: integrating artificial intelligence with clinical examination for targeted care in South Africa</article-title><source>J Radiol Nurs</source><year>2025</year><month>06</month><volume>44</volume><issue>2</issue><fpage>195</fpage><lpage>202</lpage><pub-id pub-id-type="doi">10.1016/j.jradnu.2024.12.004</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Marquez</surname><given-names>N</given-names> </name><name name-style="western"><surname>Carpio</surname><given-names>EJ</given-names> </name><name name-style="western"><surname>Santiago</surname><given-names>MR</given-names> </name><etal/></person-group><article-title>Performance of chest x-ray with computer-aided detection powered by deep learning-based artificial intelligence for tuberculosis presumptive identification during case finding in the Philippines</article-title><source>BMC Glob Public Health</source><year>2025</year><month>08</month><day>22</day><volume>3</volume><issue>1</issue><fpage>74</fpage><pub-id pub-id-type="doi">10.1186/s44263-025-00198-y</pub-id><pub-id pub-id-type="medline">40847378</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nath</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hashim</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Shukla</surname><given-names>S</given-names> </name><etal/></person-group><article-title>A multicentre study to evaluate the diagnostic performance of a novel CAD software, DecXpert, for radiological diagnosis of tuberculosis in the northern Indian population</article-title><source>Sci Rep</source><year>2024</year><month>09</month><day>5</day><volume>14</volume><issue>1</issue><fpage>20711</fpage><pub-id pub-id-type="doi">10.1038/s41598-024-71346-x</pub-id><pub-id pub-id-type="medline">39237689</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nothnagel</surname><given-names>K</given-names> </name><name name-style="western"><surname>Aslam</surname><given-names>MF</given-names> </name></person-group><article-title>Evaluating the benefits of machine learning for diagnosing deep vein thrombosis compared with gold standard ultrasound: a feasibility study</article-title><source>BJGP Open</source><year>2024</year><month>12</month><volume>8</volume><issue>4</issue><fpage>BJGPO.2024.0057</fpage><pub-id pub-id-type="doi">10.3399/BJGPO.2024.0057</pub-id><pub-id pub-id-type="medline">38866404</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Papachristou</surname><given-names>P</given-names> </name><name name-style="western"><surname>S&#x00F6;derholm</surname><given-names>M</given-names> </name><name name-style="western"><surname>Pallon</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Evaluation of an artificial intelligence-based decision support for the detection of cutaneous melanoma in primary care: a prospective real-life clinical trial</article-title><source>Br J Dermatol</source><year>2024</year><month>06</month><day>20</day><volume>191</volume><issue>1</issue><fpage>125</fpage><lpage>133</lpage><pub-id pub-id-type="doi">10.1093/bjd/ljae021</pub-id><pub-id pub-id-type="medline">38234043</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Poli</surname><given-names>UR</given-names> </name><name name-style="western"><surname>Gudlavalleti</surname><given-names>AG</given-names> </name><name name-style="western"><surname>Bharadwaj Y</surname><given-names>J</given-names> </name><name name-style="western"><surname>Pant</surname><given-names>HB</given-names> </name><name name-style="western"><surname>Agiwal</surname><given-names>V</given-names> </name><name name-style="western"><surname>Murthy</surname><given-names>GVS</given-names> </name></person-group><article-title>Development and clinical validation of visual inspection with acetic acid application-artificial intelligence tool using cervical images in screen-and-treat visual screening for cervical cancer in South India: a pilot study</article-title><source>JCO Glob Oncol</source><year>2024</year><month>12</month><volume>10</volume><issue>10</issue><fpage>e2400146</fpage><pub-id pub-id-type="doi">10.1200/GO.24.00146</pub-id><pub-id pub-id-type="medline">39666915</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>A</given-names> </name><name name-style="western"><surname>Bakhtari</surname><given-names>N</given-names> </name><name name-style="western"><surname>Langdon-Embry</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Kankanet: an artificial neural network-based object detection smartphone application and mobile microscope as a point-of-care diagnostic aid for soil-transmitted helminthiases</article-title><source>PLOS Negl Trop Dis</source><year>2019</year><month>08</month><volume>13</volume><issue>8</issue><fpage>e0007577</fpage><pub-id pub-id-type="doi">10.1371/journal.pntd.0007577</pub-id><pub-id pub-id-type="medline">31381573</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yu</surname><given-names>H</given-names> </name><name name-style="western"><surname>Mohammed</surname><given-names>FO</given-names> </name><name name-style="western"><surname>Hamid</surname><given-names>MA</given-names> </name><etal/></person-group><article-title>Patient-level performance evaluation of a smartphone-based malaria diagnostic application</article-title><source>Malar J</source><year>2023</year><month>01</month><day>27</day><volume>22</volume><issue>1</issue><fpage>33</fpage><pub-id pub-id-type="doi">10.1186/s12936-023-04446-0</pub-id><pub-id pub-id-type="medline">36707822</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhu</surname><given-names>A</given-names> </name><name name-style="western"><surname>Tailor</surname><given-names>P</given-names> </name><name name-style="western"><surname>Verma</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Implementation of deep learning artificial intelligence in vision-threatening disease screenings for an underserved community during COVID-19</article-title><source>J Telemed Telecare</source><year>2024</year><month>12</month><volume>30</volume><issue>10</issue><fpage>1590</fpage><lpage>1597</lpage><pub-id pub-id-type="doi">10.1177/1357633X231158832</pub-id><pub-id pub-id-type="medline">36908254</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Grote</surname><given-names>T</given-names> </name><name name-style="western"><surname>Berens</surname><given-names>P</given-names> </name></person-group><article-title>On the ethics of algorithmic decision-making in healthcare</article-title><source>J Med Ethics</source><year>2020</year><month>03</month><volume>46</volume><issue>3</issue><fpage>205</fpage><lpage>211</lpage><pub-id pub-id-type="doi">10.1136/medethics-2019-105586</pub-id><pub-id pub-id-type="medline">31748206</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Complete search strategies for all databases.</p><media xlink:href="ai_v5i1e80928_app1.docx" xlink:title="DOCX File, 17 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Studies excluded at full-text review with reasons.</p><media xlink:href="ai_v5i1e80928_app2.docx" xlink:title="DOCX File, 20 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Blank data extraction form template.</p><media xlink:href="ai_v5i1e80928_app3.docx" xlink:title="DOCX File, 31 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4</label><p>Detailed QUADAS-2 quality assessments. QUADAS-2: Quality Assessment of Diagnostic Accuracy Studies 2.</p><media xlink:href="ai_v5i1e80928_app4.docx" xlink:title="DOCX File, 262 KB"/></supplementary-material><supplementary-material id="app5"><label>Multimedia Appendix 5</label><p>Sensitivity analysis data and results.</p><media xlink:href="ai_v5i1e80928_app5.docx" xlink:title="DOCX File, 21 KB"/></supplementary-material><supplementary-material id="app6"><label>Multimedia Appendix 6</label><p>Reporting bias assessment details.</p><media xlink:href="ai_v5i1e80928_app6.docx" xlink:title="DOCX File, 24 KB"/></supplementary-material><supplementary-material id="app7"><label>Multimedia Appendix 7</label><p>Complete extracted data for all 20 included studies.</p><media xlink:href="ai_v5i1e80928_app7.docx" xlink:title="DOCX File, 82 KB"/></supplementary-material><supplementary-material id="app8"><label>Multimedia Appendix 8</label><p>GRADE certainty of evidence assessment. GRADE: Grading of Recommendations, Assessment, Development, and Evaluation.</p><media xlink:href="ai_v5i1e80928_app8.docx" xlink:title="DOCX File, 22 KB"/></supplementary-material><supplementary-material id="app9"><label>Checklist 1</label><p>PRISMA 2020 checklist.</p><media xlink:href="ai_v5i1e80928_app9.docx" xlink:title="DOCX File, 276 KB"/></supplementary-material></app-group></back></article>