<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR AI</journal-id><journal-id journal-id-type="publisher-id">ai</journal-id><journal-id journal-id-type="index">41</journal-id><journal-title>JMIR AI</journal-title><abbrev-journal-title>JMIR AI</abbrev-journal-title><issn pub-type="epub">2817-1705</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v5i1e81049</article-id><article-id pub-id-type="doi">10.2196/81049</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>AI-Assisted Systematic Literature Review of the Economic Burden of Pneumococcal Disease: Development and Validation Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Wang</surname><given-names>Dong</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Datta</surname><given-names>Surabhi</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Glasgow</surname><given-names>Julie</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lee</surname><given-names>Kyeryoung</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Paek</surname><given-names>Hunki</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Zhang</surname><given-names>Jun</given-names></name><degrees>MSPH, MD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Zheng</surname><given-names>Yi</given-names></name><degrees>MPH, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Huang</surname><given-names>Yi-Ling</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>He</surname><given-names>Long</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Rastegar-Mojarad</surname><given-names>Majid</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Cassell</surname><given-names>Kelsie</given-names></name><degrees>MPH, PhD</degrees><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Wang</surname><given-names>Xiaoyan</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff7">7</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Cossrow</surname><given-names>Nicole</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff6">6</xref></contrib></contrib-group><aff id="aff1"><institution>Biostatistics and Research Decision Sciences, Merck &#x0026; Co, Inc</institution><addr-line>126 East Lincoln Ave</addr-line><addr-line>Rahway</addr-line><addr-line>NJ</addr-line><country>United States</country></aff><aff id="aff2"><institution>Data Science &#x0026; Analytics, Intelligent Medical Objects</institution><addr-line>Rosemont</addr-line><addr-line>IL</addr-line><country>United States</country></aff><aff id="aff3"><institution>Clinical Informatics and Terminology Data Engineering, Intelligent Medical Objects</institution><addr-line>Rosemont</addr-line><addr-line>IL</addr-line><country>United States</country></aff><aff id="aff4"><institution>Medical Affairs, MSD R&#x0026;D (China) Co, Ltd</institution><addr-line>Beijing</addr-line><country>China</country></aff><aff id="aff5"><institution>Life Sciences, Intelligent Medical Objects</institution><addr-line>Rosemont</addr-line><addr-line>IL</addr-line><country>United States</country></aff><aff id="aff6"><institution>Outcomes Research, Merck &#x0026; Co, Inc</institution><addr-line>Rahway</addr-line><addr-line>NJ</addr-line><country>United States</country></aff><aff id="aff7"><institution>Department of Health Policy and Management, Tulane University</institution><addr-line>New Orleans</addr-line><addr-line>LA</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Malin</surname><given-names>Bradley</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Shivanna</surname><given-names>Abhishek</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Thongdaeng</surname><given-names>Hassaporn</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Liang</surname><given-names>Xiaolong</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Dong Wang, PhD, Biostatistics and Research Decision Sciences, Merck &#x0026; Co, Inc, 126 East Lincoln Ave, Rahway, NJ, 07065, United States, 1 732 594 4000; <email>dong.wang10@merck.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>15</day><month>6</month><year>2026</year></pub-date><volume>5</volume><elocation-id>e81049</elocation-id><history><date date-type="received"><day>30</day><month>07</month><year>2025</year></date><date date-type="rev-recd"><day>21</day><month>03</month><year>2026</year></date><date date-type="accepted"><day>10</day><month>04</month><year>2026</year></date></history><copyright-statement>&#x00A9; Dong Wang, Surabhi Datta, Julie Glasgow, Kyeryoung Lee, Hunki Paek, Jun Zhang, Yi Zheng, Yi-Ling Huang, Long He, Majid Rastegar-Mojarad, Kelsie Cassell, Xiaoyan Wang, Nicole Cossrow. Originally published in JMIR AI (<ext-link ext-link-type="uri" xlink:href="https://ai.jmir.org">https://ai.jmir.org</ext-link>), 15.6.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR AI, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.ai.jmir.org/">https://www.ai.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://ai.jmir.org/2026/1/e81049"/><abstract><sec><title>Background</title><p>Automated systematic literature review (SLR) may reduce the workload and errors associated with manual review, enabling faster, up-to-date reviews even with increasing publication volumes. Large language models (LLMs) have demonstrated strong capabilities in understanding unstructured languages. However, few studies have explored the potential of a comprehensive LLM platform to streamline the entire SLR process from article screening to data extraction.</p></sec><sec><title>Objective</title><p>This study aimed to investigate the feasibility of applying an LLM-based system to assist with SLR development.</p></sec><sec sec-type="methods"><title>Methods</title><p>We developed the Intelligent Systematic Literature Review (ISLaR 2.0) platform, powered by an LLM, and applied it to a use case of the economic burden of pneumococcal disease (PD) literature. First, we established the inclusion and exclusion criteria for the SLR. Second, we defined data elements related to economic burden and domain knowledge, along with guidelines for applying these definitions. Finally, we used the criteria and data element specifications to develop LLM prompts for screening and data extraction. For data extraction, we identified relevant study characteristics and economic burden outcomes. We evaluated ISLaR 2.0&#x2019;s performance against a gold standard of 50 expert-curated PD articles, using standard metrics (accuracy, precision, recall, and <italic>F</italic><sub>1</sub>-score). We also conducted a qualitative analysis to describe errors made by the system.</p></sec><sec sec-type="results"><title>Results</title><p>ISLaR 2.0 performed well in abstract and full-text screening (<italic>F</italic><sub>1</sub>-scores of 86.27 for abstract screening and 87.18 for full-text screening) and data extraction from text (<italic>F</italic><sub>1</sub>-scores of 92.83 for study details and 79.76 for economic burden outcomes). The <italic>F</italic><sub>1</sub>-score for data extraction of tabular economic burden outcome data was 94.83. The qualitative analysis revealed 2 main challenges in extracting economic burden details: misclassification of cost categories and failure to extract relevant information.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>ISLaR 2.0 enabled efficient execution of an SLR regarding the economic burden of PD. The platform allowed users to flexibly define and modify criteria and data elements, supporting its use across a broad range of health research topics.</p></sec></abstract><kwd-group><kwd>pneumococcal disease</kwd><kwd>economic burden</kwd><kwd>systematic literature review</kwd><kwd>natural language processing</kwd><kwd>generative artificial intelligence</kwd><kwd>GenAI</kwd><kwd>artificial intelligence</kwd><kwd>AI</kwd><kwd>large language models</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Systematic literature reviews (SLRs) in the field of health sciences enable the synthesis of robust and reliable evidence for clinical decision-making and regulatory submissions, as well as the identification of knowledge gaps [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. However, manual SLRs are time consuming and labor intensive [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref5">5</xref>], with one study finding that SLR development typically requires approximately 67 weeks of skilled labor from project initiation to publication [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. Automation can greatly enhance the efficiency of SLR development [<xref ref-type="bibr" rid="ref4">4</xref>]. To this end, artificial intelligence (AI), including large language models (LLMs), has increasingly been used to assist with SLR activities, ranging from the screening of abstracts [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref9">9</xref>] to full-text data extraction [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref12">12</xref>].</p><p>Previous studies of AI-assisted SLR development have often focused either on a single aspect of the process, such as eligible article screening, or on clinical topics, such as treatment efficacy in oncology or immune diseases [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref12">12</xref>]. However, it is important to understand whether AI-assisted SLR tools are suitable for other types of health research, including studies in epidemiology, public health, and health economics. Compared with clinical research, these topics may involve a broader range of biological, social, monetary, and infrastructural factors and thus more varied terminology and outcome measures. Economic burden studies, for example, evaluate the financial impact of a disease on individuals, health care systems, and society, potentially encompassing a wide range of clinical, epidemiological, and monetary measures. SLR development based on conventional AI (eg, supervised machine learning) has been evaluated for economic burden of disease research [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref15">15</xref>]. However, few studies have assessed a comprehensive SLR platform that uses an LLM, specifically GPT-4 (OpenAI), to conduct all stages of an economic burden of disease SLR.</p><p>Notably, SLRs of economic burden of disease studies address a critical need to synthesize data to evaluate the wider impact of vaccination programs, including effects on medical costs and health care resource use [<xref ref-type="bibr" rid="ref16">16</xref>-<xref ref-type="bibr" rid="ref23">23</xref>]. This data synthesis is relevant for diseases such as pneumococcal disease (PD), a condition associated with a high burden of morbidity and mortality worldwide that may be lowered by pneumococcal vaccines [<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref26">26</xref>]. However, conducting SLRs for economic burden of disease and cost-effectiveness research can be challenging. Studies in this area are often lengthy, involve complex and varying methods, include large tables of input variables, and analyze multiple scenarios for a wide range of input values. Thus, data extraction and output consolidation can be time consuming and difficult to standardize across studies.</p><p>To address the need for more efficient development of SLRs in economic burden of disease studies, we leveraged the capabilities of generative AI (GenAI) to develop the Intelligent Systematic Literature Review (ISLaR 2.0) system, an LLM-based SLR platform designed to seamlessly conduct SLRs for a broad range of study types. ISLaR 2.0 automates the entire SLR process, from the screening of abstracts and full-text documents to full-text data extraction. This is an enhancement of our ISLaR 1.0 system [<xref ref-type="bibr" rid="ref15">15</xref>], which can only be used for abstract screening and full-text data extraction. Here, we applied ISLaR 2.0 to a review of the literature on the economic burden of PD in high-risk populations, which served as a use case to evaluate the platform&#x2019;s capabilities and performance.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Overview</title><p>ISLaR 2.0 is a comprehensive SLR platform that incorporates key steps of SLR, including article retrieval from the PubMed and Scopus databases, abstract and full-text screening, data extraction, and results summarization. Thus, ISLaR 2.0 provides LLM-assisted functionality to address the most time-consuming steps of SLR development. For each LLM-assisted step, ISLaR 2.0 uses criteria and other information provided by users through a user interface to construct LLM prompts that guide task processing. Notably, ISLaR 2.0 generates recommendations with explanations for manual human review, thereby incorporating a human-in-the-loop approach into the system. We evaluated the performance of ISLaR 2.0 by comparing the system&#x2019;s recommendations against a gold standard of 50 manually screened and extracted PD articles. Performance was assessed quantitatively by calculating standard evaluation metrics and qualitatively by manually reviewing articles to describe system errors in screening and data extraction.</p></sec><sec id="s2-2"><title>ISLaR 2.0 SLR Workflow</title><p>ISLaR 2.0 processes are driven by GPT-4 prompts constructed from information manually input into simple, semistructured user interfaces (Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). These prompts drive the development of the SLR across multiple steps (<xref ref-type="fig" rid="figure1">Figure 1</xref>). In step 1, the retrieval of articles from PubMed and Scopus databases is enabled by user input queries derived from topic-specific inclusion and exclusion criteria. In this evaluation, studies of the economic burden of PD were retrieved from the PubMed database using the following query: ((&#x201C;Pneumococcal disease*&#x201C;[Text Word] OR &#x201C;Streptococcus pneumoniae&#x201D;[Text Word]) AND ((cancer[Text Word]) OR (Immunocompromis*[Text Word]) OR (HIV[Text Word]) OR (&#x201C;renal disease&#x201D;[Text Word]) OR (asplenia[Text Word]) OR (diabetes[Text Word]) OR (&#x201C;heart disease&#x201D;[Text Word]) OR (&#x201C;lung disease&#x201D;[Text Word]) OR (&#x201C;respiratory disease&#x201D;[Text Word]) OR (&#x201C;sickle cell&#x201D;[Text Word]) OR (&#x201C;cochlear implant&#x201D;[Text Word]) OR &#x201C;cerebrospinal fluid leak&#x201D;[Text Word])) AND ((cost[Text Word]) OR (&#x201C;economic burden&#x201D;[Text Word]))</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Intelligent Systematic Literature Review (ISLaR 2.0) workflow.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v5i1e81049_fig01.png"/></fig><p>In step 2, the user inputs the SLR-specific population, intervention, comparison, and outcome (PICO) criteria, which provide the basis for inclusion or exclusion of articles in abstract and full-text screening. The user may specify different criteria for abstracts than for full-text screening, for example, to enable the capture of a broader set of potentially relevant abstracts and a narrower, more accurate set of full-text articles. In this study, we used the same set of criteria at both abstract and full-text screening stages as defined in <xref ref-type="table" rid="table1">Table 1</xref>, but used different prompts as shown in Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. PICO-based criteria used in this study allowed for a wide range of study designs (eg, clinical trials, real-world evaluations, and meta-analyses), enabling assessment of data extraction for an array of economic burden measures.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Population, intervention, comparison, and outcome criteria for economic burden studies of pneumococcal disease (PD) studies.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Criteria</td><td align="left" valign="bottom">Inclusion criteria</td><td align="left" valign="bottom">Exclusion criteria</td></tr></thead><tbody><tr><td align="left" valign="top">Population</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Adults (aged &#x2265;18 years) or children (aged 0&#x2010;17 years) with PD or a disease associated with <italic>Streptococcus pneumoniae</italic></p></list-item><list-item><p>High-risk populations with PD or diseases associated with <italic>S pneumoniae</italic>, including individuals with cancer, immunocompromising conditions, HIV, renal disease, asplenia, diabetes, heart conditions, lung conditions, sickle cell disease, cochlear implants, and cerebrospinal fluid leaks</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Mixed pediatric and adult populations without segregated results</p></list-item><list-item><p>Studies that did not report on at least one of the following conditions: pneumococcal pneumonia, nonbacteremic pneumococcal pneumonia, pneumococcal or streptococcal meningitis (including postmeningitis sequelae), acute otitis media, pneumococcal bacteremia (sepsis or septicemia), streptococcal septicemia, all-cause pneumonia<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>, community-acquired pneumonia<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>, unspecified bacterial pneumonia<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>, complicated pneumonia<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>, bacteremic pneumonia<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>, all-cause otitis media (OM)<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>, acute suppurative OM<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>, recurrent OM<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>, complicated OM<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>, OM with tympanostomy tube replacement<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>, empyema<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>, and pleural effusion<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></p></list-item></list></td></tr><tr><td align="left" valign="top">Interventions or comparators</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Data that are not specific to any therapy</p></list-item><list-item><p>Data specific to PD vaccination</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>&#x2014;<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></p></list-item></list></td></tr><tr><td align="left" valign="top">Outcomes</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Study results including at least one of the following economic burden outcomes: direct costs, indirect costs, societal costs, and resource use</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Studies not reporting on at least one of the outcomes listed in the inclusion criteria</p></list-item></list></td></tr><tr><td align="left" valign="top">Study types</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Meta-analysis study</p></list-item><list-item><p>Review study</p></list-item><list-item><p>Original research article study</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>&#x2014;</p></list-item></list></td></tr><tr><td align="left" valign="top">Other</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Studies must report data from one or more of the following countries: the United States, Canada, South Korea, South Africa, Japan, Australia, France, Germany, Italy, Spain, Brazil, and member countries of the United Kingdom</p></list-item><list-item><p>English language only</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Studies in a language other than English</p></list-item><list-item><p>Duplicate reports (eg, conference abstracts that report on data that is subsequently published)</p></list-item></list></td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>Even if pneumococcal disease was not confirmed, studies reporting at least one of these outcomes were considered eligible.</p></fn><fn id="table1fn2"><p><sup>b</sup>Not applicable.</p></fn></table-wrap-foot></table-wrap><p>In step 3, abstract screening is performed using the GPT-4 Turbo model based on manual input of PICO-based criteria, the title, abstract text, additional domain information, and general screening instructions. In this evaluation, screening instructions were designed to be inclusive to favor a high recall of potentially relevant abstracts, minimizing erroneous exclusions during this initial screening. After processing the prompt instructions, ISLaR 2.0 outputs its decision to include or exclude the abstract and, consistent with the system&#x2019;s human-in-the-loop approach, generates a list of reasons for excluding abstracts, enabling user acceptance or rejection of the system recommendation (<xref ref-type="fig" rid="figure2">Figure 2</xref>).</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>System interface showing large language model&#x2013;recommended screening decisions.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v5i1e81049_fig02.png"/></fig><p>Step 4 entails full-text screening, in which PICO-based criteria, additional domain information, general screening instructions, and the full text of the article are input into the system prompt and processed via the GPT-4 Turbo model. In this study, we used the Amazon Textract tool [<xref ref-type="bibr" rid="ref21">21</xref>] to convert full-text publications from PDF into text format, facilitating their input into the system prompts for screening and data extraction. To ensure high precision at this stage, we also used stricter screening instructions than those used for abstract screening, namely, all inclusion criteria had to match, and none of the exclusion criteria were allowed to match (Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p><p>Step 5 entails the extraction of user-defined data elements from articles marked for inclusion after full-text screening (<xref ref-type="table" rid="table2">Table 2</xref>). These data elements and their descriptions are input into the GPT-4 Turbo model prompts in 2 formats, one for the main text of the articles and the other for tabular data. Extracting data from full-text articles involves 2 sets of prompts, one for study details and the other for study outcomes. The study details prompt extracts data relevant to the overall study, including the data collection period, study type, country or continent, study objectives, study populations, sample size, age (number), age unit (eg, years or months), and gender distribution (<xref ref-type="fig" rid="figure3">Figure 3</xref>). For each element of the study details prompt, the prompt needs to identify 3 attributes: the element name, its value, and the evidence text span from the full-text article. The study outcomes prompt extracts economic burden outcomes (ie, direct costs, indirect costs, societal costs, and health care resource use) specific to each study cohort. Thus, the study outcomes prompt requires identification of 4 attributes: study cohort, element, value, and the evidence text span from the full-text article. For instance, in an article included in this study the element &#x201C;Resource use&#x201D; had an extracted value of &#x201C;652 cases reduced; 23 deaths reduced; NNV=194&#x201D; with the associated study cohort of &#x201C;US black population cohort&#x201D;; the evidence text span was identified as: &#x201C;In cohorts of 50-year-olds over their remaining lifetime, the strategies with the greatest public health impact, compared with no vaccination when PPSV was assumed to be ineffective against NBP, reduced PD cases by 652 (number needed to vaccinate [NNV] to prevent 1 case=194) and deaths by 23 among a cohort of 549,197 blacks.&#x201D;</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Description of data elements.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Element category and study detail</td><td align="left" valign="bottom">Description</td></tr></thead><tbody><tr><td align="left" valign="top">Data collection period</td><td align="left" valign="top">The time frame of data collection</td></tr><tr><td align="left" valign="top">Study type</td><td align="left" valign="top">The type of study design (eg, randomized controlled trial study, observational study, cross-sectional study, database claims analysis, and cost-effectiveness study)</td></tr><tr><td align="left" valign="top">Country or continent</td><td align="left" valign="top">The country and/or the continent associated with the study</td></tr><tr><td align="left" valign="top">Study objective</td><td align="left" valign="top">Overview of study details, such as the objective outlined in the article</td></tr><tr><td align="left" valign="top">Study populations</td><td align="left" valign="top">Population with pneumococcal disease and possible high-risk conditions (eg, pneumococcal disease without high risk, cancer, immunocompromising conditions, HIV, renal disease, asplenia, diabetes, heart conditions, lung conditions, sickle cell disease, cochlear implants, and cerebrospinal fluid leaks)</td></tr><tr><td align="left" valign="top">Sample size</td><td align="left" valign="top">The number of participants included in the study</td></tr><tr><td align="left" valign="top">Age measure</td><td align="left" valign="top">The measure used to report for age (eg, mean and median)</td></tr><tr><td align="left" valign="top">Age unit</td><td align="left" valign="top">The unit of measure used to report age (eg, years)</td></tr><tr><td align="left" valign="top">Gender distribution</td><td align="left" valign="top">The gender distribution of study participants</td></tr><tr><td align="left" valign="top">Direct costs</td><td align="left" valign="top">The cost of direct patient care for pneumococcal disease, including the units of measure (eg, &#x201C;mean cost per year,&#x201D; &#x201C;median cost per patient,&#x201D; and &#x201C;total cost for a cohort&#x201D;)</td></tr><tr><td align="left" valign="top">Indirect costs</td><td align="left" valign="top">Short- and long-term lost productivity associated with pneumococcal disease</td></tr><tr><td align="left" valign="top">Societal costs</td><td align="left" valign="top">The total cost to a society resulting from pneumococcal disease (eg, mortality-related costs)</td></tr><tr><td align="left" valign="top">Resource use</td><td align="left" valign="top">Health care resource use related to pneumococcal disease, including care costs, numbers of inpatient and outpatient visits, inpatient length of stay, percent reductions in inpatient visits, the number of medical tests and procedures, and specification of the associated time of health care resource use</td></tr></tbody></table></table-wrap><p>To facilitate generation of input data for the study outcomes prompt, study cohorts are first identified before constructing the prompt using a separate prompt with the following instructions:</p><disp-quote><p>Extract detailed names or descriptions of all cohorts, sub-cohorts, sub-groups, and study arms mentioned in the following article.</p></disp-quote><p>Subsequently, cohort specifications are input into the outcome extraction prompt with the following instructions:</p><disp-quote><p>We aim to extract all relevant information related to economic burden from the following article (full text). The following are some information categories or data elements we are interested in: &#x003C;list of all data elements with their descriptions followed by detailed instructions and article full text&#x003E;.</p></disp-quote><p>In addition to the attributes defined for each data element, additional domain knowledge may be input into data extraction prompts. During data extraction, the system highlights the relevant section of text and the extracted value and allows the user to directly edit any extracted values, in case errors have occurred (<xref ref-type="fig" rid="figure3">Figure 3</xref>).</p><p>In this study, data extraction prompts were developed and optimized using 5 articles on the economic burden of PD randomly selected from those identified in the PubMed database search (step 1). These 5 studies were not among those included in the performance evaluations, as described in the next section. Prompts for extracting economic burden outcomes were initially constructed in ISLaR 2.0 using the GPT-4 Turbo model. However, the GPT-4 Turbo model showed suboptimal performance for extracting tabular data due to errors in automatically converting long and multiheader tables from PDFs into text format. To address this issue, the GPT-4o model was tested for its image-processing capabilities, wherein each table was fed into the model as an image and prompted to extract and organize economic cost information. Information identified in tables was organized by attributes such as study cohort, element, or value.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>System interface showing large language model&#x2013;extracted data elements and values evidenced by the highlighted text spans in an article.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v5i1e81049_fig03.png"/></fig></sec><sec id="s2-3"><title>Evaluation</title><p>To evaluate the system performance, we selected 50 expert-curated articles from the pool of 108 retrieved articles. A gold standard was established through expert manual review following a focused calibration period using articles not included in the study pool. Calibration used a scope-boxed collaborative approach and continued until all experts agreed that bias had been substantially mitigated and screening decisions were aligned to ensure consistency. A senior physician with prior SLR experience then independently screened all abstracts and included full-text articles. For cases in which inclusion or exclusion decisions were ambiguous, consensus discussions were conducted among a multidisciplinary team comprising one physician and 3 PhD-level researchers to resolve disagreements and minimize potential bias. For abstract screening, the evaluation dataset consisted of 50 expert-curated articles, which served as the gold standard. For full-text screening, the evaluation dataset consisted of 23 articles labeled as &#x201C;relevant&#x201D; in human screening of the abstracts of the original 50 selected articles. Data extraction was evaluated using 19 articles labeled as &#x201C;relevant&#x201D; in human screening of full-text articles. We calculated performance scores by comparing the system predictions against the gold standard. Screening performance was based on comparisons between the final screening decision to include or exclude the article and the corresponding gold standard classification. Data extraction performance was based on a comparison of system-extracted data elements and associated study cohort information with those of the gold standard. Quantitative evaluations of performance were conducted by calculating 4 standard evaluation metrics: accuracy, precision, recall, and <italic>F</italic><sub>1</sub>-scores. Accuracy was defined as the percentage of correct predictions made by the system out of the total number of predicted instances. Precision was defined as the percentage of correctly classified positives out of all positives predicted. Recall was defined as the percentage of correctly classified positives out of all actual positives. <italic>F</italic><sub>1</sub>-scores were calculated as the harmonic mean of precision and recall, providing a comprehensive assessment of the system performance. We also conducted a qualitative analysis of the errors made by the system to identify challenging areas in screening and data extraction. All annotation and evaluation procedures were conducted using fully blinded protocols, with annotators kept independent of the system development process to minimize potential bias.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Screening Performance</title><p>The system performance for abstract and full-text screening is shown in <xref ref-type="table" rid="table3">Table 3</xref>. The system achieved a recall of 95.65% for abstract screening and 89.47% for full-text screening. The <italic>F</italic><sub>1</sub>-score was 86.27% for abstract screening and 87.18% for full-text screening. Screening classification results are summarized in the confusion matrices in <xref ref-type="table" rid="table4">Table 4</xref>.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Performance metrics for abstract and full-text screening.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Module</td><td align="left" valign="bottom">Accuracy (%; 95% CI)</td><td align="left" valign="bottom">Precision (%; 95% CI)</td><td align="left" valign="bottom">Recall (%; 95% CI)</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score (%)</td></tr></thead><tbody><tr><td align="left" valign="top">Abstract screening (n=50)</td><td align="left" valign="top">86.00 (74&#x2013;93)</td><td align="left" valign="top">78.57 (60&#x2013;90)</td><td align="left" valign="top">95.65 (79&#x2013;99)</td><td align="left" valign="top">86.27</td></tr><tr><td align="left" valign="top">Full-text screening (n=23)</td><td align="left" valign="top">78.26 (58-90)</td><td align="left" valign="top">85.00 (64-95)</td><td align="left" valign="top">89.47 (69-97)</td><td align="left" valign="top">87.18</td></tr></tbody></table></table-wrap><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Confusion matrices for abstract and full-text screening.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">System prediction: relevant</td><td align="left" valign="bottom">System prediction: irrelevant</td><td align="left" valign="bottom">System prediction: total</td></tr></thead><tbody><tr><td align="left" valign="bottom" colspan="4">Abstracts screened, n</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gold standard: relevant</td><td align="left" valign="top">22</td><td align="left" valign="top">1</td><td align="left" valign="top">23</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gold standard: irrelevant</td><td align="left" valign="top">6</td><td align="left" valign="top">21</td><td align="left" valign="top">27</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gold standard: total</td><td align="left" valign="top">28</td><td align="left" valign="top">22</td><td align="left" valign="top">50</td></tr><tr><td align="left" valign="top" colspan="4">Full texts screened, n</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gold standard: relevant</td><td align="left" valign="top">17</td><td align="left" valign="top">2</td><td align="left" valign="top">19</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gold standard: irrelevant</td><td align="left" valign="top">3</td><td align="left" valign="top">1</td><td align="left" valign="top">4</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gold standard: total</td><td align="left" valign="top">20</td><td align="left" valign="top">3</td><td align="left" valign="top">23</td></tr></tbody></table></table-wrap></sec><sec id="s3-2"><title>Data Extraction Performance</title><p>Among the 19 (100%) articles labeled as relevant in the full-text gold standard screening, 13 (68.4%) contained information on economic costs and resource use in their tables, and 6 articles (31.6%) either did not contain any tables or contained tables without relevant information. Thus, all 19 (100%) articles were used to evaluate data extraction from the main text, while 13 (68.4%) articles containing a total of 20 tables were used to evaluate extraction of tabular data.</p><p><xref ref-type="table" rid="table5">Table 5</xref> shows the system performance in extracting data elements. <italic>F</italic><sub>1</sub>-scores for GPT-4 Turbo&#x2013;based data extraction from the main text were 92.83% for identifying the study details and 79.76% for identifying economic burden elements. The <italic>F</italic><sub>1</sub>-score for GPT-4o&#x2013;based extraction of economic burden elements from tables was 94.83%. Furthermore, for the main text, we calculated the performance scores for individual attributes (ie, the associated cohort, the element, and the value) of the economic burden elements. The <italic>F</italic><sub>1</sub>-scores were 92.55% for identifying the study cohort, 80.00% for identifying the data element, and 84.88% for identifying the value (data not shown in the table).</p><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>Performance measures for data element extraction from the full text of 19 articles.</p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Data element category</td><td align="left" valign="bottom">Accuracy (%; 95% CI)</td><td align="left" valign="bottom">Precision (%; 95% CI)</td><td align="left" valign="bottom">Recall (%; 95% CI)</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score (%)</td></tr></thead><tbody><tr><td align="left" valign="top">Study details from main text (GPT-4 Turbo)</td><td align="left" valign="top">87.15 (82-91)</td><td align="left" valign="top">87.13 (81-91)</td><td align="left" valign="top">99.33 (96-100)</td><td align="left" valign="top">92.83</td></tr><tr><td align="left" valign="top">Economic burden outcomes from tables (GPT-4o)</td><td align="left" valign="top">90.17 (87-92)</td><td align="left" valign="top">90.67 (88-93)</td><td align="left" valign="top">99.39 (98-100)</td><td align="left" valign="top">94.83</td></tr><tr><td align="left" valign="top">Economic burden outcomes from main text (GPT-4 Turbo)</td><td align="left" valign="top">66.34 (57-75)</td><td align="left" valign="top">74.44 (65-82)</td><td align="left" valign="top">85.90 (77-92)</td><td align="left" valign="top">79.76</td></tr></tbody></table></table-wrap></sec><sec id="s3-3"><title>Error Analysis</title><p>A manual review of system errors in screening identified reasons for false positive and false negative errors. False positive errors in abstract screening occurred when the system incorrectly included articles focused on clinical outcomes rather than economic burden outcomes. False positive errors in full-text screening occurred when the system misidentified criteria for high-risk populations and age groups. For example, the system included an article regarding nonimmunocompromised older adults, which was not a high-risk population as defined in the PICO criteria (<xref ref-type="table" rid="table1">Table 1</xref>). False negative errors in full-text screening occurred when the system excluded articles based on an incorrect interpretation of the study design criteria (eg, the system excluded a meta-analysis study).</p><p>In total, 22 errors were made during the system extraction of study detail data from the main text of the 19 articles (<xref ref-type="table" rid="table6">Table 6</xref>). These errors involved misinterpretation of the study type, failure to identify age units, and incomplete extraction of information on the data collection period and sample size. A total of 23 false positive errors and 11 false negative errors occurred (<xref ref-type="table" rid="table7">Table 7</xref>) with the GPT-4 Turbo&#x2013;based extraction of economic burden data from the main text of 19 articles. False positive errors involved misclassifying the burden elements, inaccurately capturing burden values, and failing to identify some indirect costs. One false positive error was linked to both element misclassification and inaccurate cohort identification. The GPT-4o model performed better than the GPT-4 Turbo model in extracting tabular economic burden data. Two false positive errors and 3 false negatives were observed (not shown in the table) when using GPT-4o to extract economic burden data from 20 tables in 19 articles. The false positive errors occurred in 2 tables, from which cohort information was partially captured (eg, missing additional information such as &#x201C;initial CD4 count of 350 cells/mm<sup>3</sup>&#x201D; and &#x201C;100,000 HIV-infected 30-year-old patients&#x201D;). Furthermore, in one table, an element description was missing an important detail (&#x201C;related to influenza vaccination&#x201D;). All false negative errors in GPT-4o&#x2013;based data extraction were associated with the system being unable to identify the value in the first cell of the first row of the table.</p><table-wrap id="t6" position="float"><label>Table 6.</label><caption><p>The top 3 incorrectly extracted study detail data elements (n=22).</p></caption><table id="table6" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Element name</td><td align="left" valign="bottom">Error prevalence, n (%)</td><td align="left" valign="bottom">Example</td></tr></thead><tbody><tr><td align="left" valign="top">Study type</td><td align="left" valign="top">6 (27.3)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>The system predicted a real-world data analysis study to be an observational study [<xref ref-type="bibr" rid="ref27">27</xref>]</p></list-item></list></td></tr><tr><td align="left" valign="top">Age unit</td><td align="left" valign="top">5 (22.7)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>The system failed to identify &#x201C;years&#x201D; for a study involving younger and older adults [<xref ref-type="bibr" rid="ref28">28</xref>]</p></list-item></list></td></tr><tr><td align="left" valign="top">Data collection period and sample size</td><td align="left" valign="top">3 (13.6)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Data collection period: the system extracted &#x201C;January 1, 2000, to July 5, 2010,&#x201D; which was partially correct but missed an important detail about search update: &#x201C;The search was updated monthly through the AutoAlert function of the search up to January 31, 2011&#x201D;</p></list-item><list-item><p>Sample size: the system identified &#x201C;100,000&#x201D;; however, the complete information should have been &#x201C;a hypothetical cohort of 100,000 50-year-old adults [<xref ref-type="bibr" rid="ref29">29</xref>]&#x201D;</p></list-item></list></td></tr></tbody></table></table-wrap><table-wrap id="t7" position="float"><label>Table 7.</label><caption><p>Errors in GPT-4 Turbo&#x2013;based extraction of economic burden data from the main text.</p></caption><table id="table7" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Error categories</td><td align="left" valign="bottom">Error prevalence, n (%)</td><td align="left" valign="bottom">Example</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="3">False positive errors<sup><xref ref-type="table-fn" rid="table7fn1">a</xref></sup> (n=23)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Elements misclassified</td><td align="left" valign="top">11 (47.8)</td><td align="left" valign="top">The system mistakenly labeled the societal costs as direct costs for the article text stating &#x201C;...a single dose PCV13 strategy costs $70,937 per quality adjusted life year (QALY) gained compared to no vaccination [<xref ref-type="bibr" rid="ref30">30</xref>]&#x201D;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Economic burden values not extracted meaningfully</td><td align="left" valign="top">5 (21.7)</td><td align="left" valign="top">The system extracted &#x201C;$13.9 million for at-risk persons&#x201D; as direct costs associated with annual cost of all-cause pneumonia; however, it missed an important detail &#x201C;per 100,000 persons&#x201D; [<xref ref-type="bibr" rid="ref31">31</xref>]</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Element lacked context</td><td align="left" valign="top">3 (13)</td><td align="left" valign="top">The system extracted &#x201C;Direct costs&#x201D; instead of &#x201C;Direct costs - mean cost of hospital stay for ICU-admitted patients&#x201D; [<xref ref-type="bibr" rid="ref32">32</xref>]</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Extraneous elements identified</td><td align="left" valign="top">3 (13)</td><td align="left" valign="top">The system extracted &#x201C;Costs of vaccine, program development, and side effect treatment&#x201D; as a direct cost, although this information was not requested [<xref ref-type="bibr" rid="ref33">33</xref>]</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Study cohort identified inaccurately</td><td align="left" valign="top">2 (8.7)</td><td align="left" valign="top">The system predicted &#x201C;Entire cohort&#x201D; instead of &#x201C;Vaccinated cohort&#x201D; for a societal cost element [<xref ref-type="bibr" rid="ref34">34</xref>]</td></tr><tr><td align="left" valign="top" colspan="3">False negative errors (n=11)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Indirect costs</td><td align="left" valign="top">7 (63.6)</td><td align="left" valign="top">Vaccination cost and incremental cost-effectiveness ratio (ICER) [<xref ref-type="bibr" rid="ref35">35</xref>]</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Direct costs</td><td align="left" valign="top">2 (18.2)</td><td align="left" valign="top">Total cost of vaccination plus treating pneumonia [<xref ref-type="bibr" rid="ref34">34</xref>]</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Societal costs</td><td align="left" valign="top">1 (9.1)</td><td align="left" valign="top">Total projected cost of treating pneumonia [<xref ref-type="bibr" rid="ref34">34</xref>]</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Resource use</td><td align="left" valign="top">1 (9.1)</td><td align="left" valign="top"><italic>Streptococcus pneumoniae</italic> was associated with the largest burden for adults [<xref ref-type="bibr" rid="ref36">36</xref>]</td></tr></tbody></table><table-wrap-foot><fn id="table7fn1"><p><sup>a</sup>The numerators for false positive errors add up to 23 because 1 error involved both element misclassification and inaccurate cohort identification.</p></fn></table-wrap-foot></table-wrap></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>We developed an LLM-based intelligent SLR platform, ISLaR 2.0, and evaluated the platform&#x2019;s performance in completing complex tasks involved in developing SLRs for economic burden studies. While previous research has primarily focused on specific aspects of the SLR process, such as eligible article screening [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref10">10</xref>], PICO element extraction [<xref ref-type="bibr" rid="ref37">37</xref>], or data extraction in small studies as a proof of concept [<xref ref-type="bibr" rid="ref11">11</xref>], our ISLaR 2.0 platform automated the entire SLR workflow. This included comprehensive screening of eligible articles based on abstracts and full-text review, as well as extraction of data from full-text articles.</p><p>Several existing SLR automation tools have been developed [<xref ref-type="bibr" rid="ref38">38</xref>], including Trialstreamer [<xref ref-type="bibr" rid="ref39">39</xref>], SWIFT-Review [<xref ref-type="bibr" rid="ref40">40</xref>], DistillerSR [<xref ref-type="bibr" rid="ref41">41</xref>], NestedKnowledge [<xref ref-type="bibr" rid="ref42">42</xref>], SWIFT-ActiveScreener [<xref ref-type="bibr" rid="ref43">43</xref>], Abstrackr [<xref ref-type="bibr" rid="ref44">44</xref>], EPPI-Reviewer [<xref ref-type="bibr" rid="ref45">45</xref>], and RobotReviewer [<xref ref-type="bibr" rid="ref46">46</xref>]. Tools such as Trialstreamer and SWIFT-Review primarily focus on expediting the scoping process prior to initiating a living systematic review by identifying studies most likely to be relevant or of higher quality, often with minimal user input. DistillerSR, NestedKnowledge, Abstrackr, and EPPI-Reviewer use active learning to screen and reorder references, thereby prioritizing those likely to be relevant for human review. SWIFT-ActiveScreener further assists reviewers by estimating screening completeness and notifying them when manual screening may be stopped early. With respect to data extraction, relatively few tools have automated this component. RobotReviewer and Trialstreamer, for example, implement semiautomated methods for extracting data from eligible articles. In contrast, our study introduces an end-to-end AI-assisted SLR pipeline that enables both abstract and full-text screening with supporting explanations, as well as comprehensive data extraction, all guided by user feedback and preferences. The human-in-the-loop interface provides a comprehensive view of each abstract, along with an AI-recommended disposition and supporting rationale. The rationale includes details for each relevant exclusion criterion, as well as an overarching AI explanation. This approach gives human users maximum control over inclusion and exclusion decisions while streamlining the assessment and disposition process.</p><p>Wang et al [<xref ref-type="bibr" rid="ref12">12</xref>] made strides in accelerating the SLR process by establishing an LLM-based pipeline focused on oncology therapy studies. In this study, we developed a generalizable LLM-assisted platform that can be easily adapted to different topics and outcome data elements. To evaluate the effectiveness of ISLaR 2.0, we applied it to a use case involving the literature on the economic burden of PD, allowing assessment of the platform&#x2019;s capabilities regarding health economic studies. Our experience suggests that ISLaR 2.0 has the potential to enable a small team of researchers to perform all steps of an SLR in a shorter time frame than a conventional SLR, although the speed and efficiency of the system should be formally evaluated in further studies.</p><p>Findings of this study showed that ISLaR 2.0 had high recall during abstract screening and high precision during full-text screening (&#x003E;85%), indicating its robustness at both stages in identifying eligible articles. The system also exhibited strong performance in extracting information on study characteristics and capturing important aspects of economic burden outcomes. While developing ISLaR 2.0, we noted challenges in extracting tabular data using the GPT-4 Turbo model. Although the system&#x2019;s GPT-4 Turbo prompts worked well for extracting numeric cost values associated with the burden outcomes, they often lacked accuracy in extracting the appropriate study cohort and units in the burden element values. This could have been due to the complex and varied structure of the tables or difficulties in converting tables from PDF documents to text. Our experiments with the GPT-4o model using table images yielded a substantially improved performance, including an evaluation <italic>F</italic><sub>1</sub>-score of 94.83%, demonstrating the system&#x2019;s capability to capture most of the relevant tabular data. Further research should be conducted to examine the scalability of this approach.</p><p>In the future, we aim to further improve the ISLaR system by evaluating prompt refinement to enable more accurate extraction of study detail elements and outcomes. For economic burden studies, this could include providing a more precise interpretation of societal costs, adding descriptions of how cost categories differ from one another, and providing example scenarios in the prompts. Accuracy during data extraction may also be improved by defining and incorporating examples of common concepts related to each cost category. Notably, ISLaR 2.0 does not extract information from supplementary data in articles, which often include relevant economic burden&#x2013;related details. Thus, we intend to expand the system&#x2019;s capacity to include a review of supplementary data. In addition, while prior work has demonstrated substantial time savings associated with the ISLaR platform [<xref ref-type="bibr" rid="ref47">47</xref>], this study did not formally quantify reviewer time-on-task or implementation costs, which we hope to examine further in future evaluations. The inclusion criteria may also have biased cost estimates toward high-income settings, potentially limiting the generalizability of the findings to lower- and middle-income countries. Similarly, this study was conducted using a limited corpus within a defined disease area, which may restrict the generalizability of the findings. Although appropriate for a feasibility assessment, future research should evaluate the performance of the proposed workflow across larger and more diverse datasets spanning multiple disease domains. Additionally, further investigation may be warranted to determine whether hardware-specific optimizations influence the performance or efficiency of locally deployed LLMs used for systematic literature screening. Looking ahead, we aim to examine the applicability of this SLR system to other topics (eg, clinical trials, public health, and epidemiology) to better assess its generalizability.</p></sec><sec id="s4-2"><title>Conclusions</title><p>This study demonstrated how the capabilities of an advanced LLM can be harnessed to conduct rigorous SLR tasks involved in the analysis of the economic burden of PD. With appropriate human-in-the-loop review and oversight, ISLaR 2.0 may substantially reduce human resource requirements for SLR development by decreasing time and cost. Moreover, by providing an end-to-end solution for SLR development that accurately identifies and extracts relevant information from a wide range of study types, our AI system may accelerate evidence generation for the economic burden of diseases and other research areas.</p></sec></sec></body><back><ack><p>The authors would like to thank Ingrid Peterson, PhD, in collaboration with ScribCo, for medical editing services.</p></ack><notes><sec><title>Funding</title><p>Funding for this research was provided by Merck Sharp &#x0026; Dohme LLC, a subsidiary of Merck &#x0026; Co, Inc, Rahway, NJ, USA.</p></sec><sec><title>Data Availability</title><p>Data sharing is not applicable to this article as no datasets were generated or analyzed during this study.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: DW, JZ, MR-M, NC</p><p>Data curation: DW, SD, JG, MR-M, NC</p><p>Formal analysis: DW, SD, JG, KL, HP, MR-M, NC</p><p>Investigation: DW, SD, JG, KL, HP, YZ, YLH, MR-M, NC</p><p>Methodology: DW, SD, JG, KL, HP, JZ, YZ, YLH, MR-M, KC, XW, NC</p><p>Resources: DW, MR-M</p><p>Software: LH</p><p>Supervision: DW</p><p>Validation: DW, SD, JG, KL, HP, YZ, YLH, MR-M, KC, NC</p><p>Visualization: DW, SD</p><p>Writing&#x2014;original draft: DW, SD</p><p>Writing&#x2014;review and editing: DW, SD, JG, KL, HP, JZ, YZ, YLH, LH, MR-M, KC, XW, NC</p></fn><fn fn-type="conflict"><p>DW, YZ, YLH, KC, and NC are employees of Merck Sharp &#x0026; Dohme LLC, a subsidiary of Merck &#x0026; Co, Inc, Rahway, NJ, USA, who may own stock or hold stock options. SD, JG, KL, HP, LH, MR-M, and XW are employees of IMO Health, which received funding from Merck Sharp &#x0026; Dohme LLC, a subsidiary of Merck &#x0026; Co, Inc, Rahway, NJ, USA, to conduct this study. JG owns Merck stock. JZ is an employee of MSD R&#x0026;D (China) Co, Ltd, Beijing, China.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">ISLaR 2.0</term><def><p>Intelligent Systematic Literature Review</p></def></def-item><def-item><term id="abb3">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb4">PD </term><def><p>pneumococcal disease</p></def></def-item><def-item><term id="abb5">PICO</term><def><p>population, intervention, comparison, and outcome</p></def></def-item><def-item><term id="abb6">SLR</term><def><p>systematic literature review</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Page</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>McKenzie</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Bossuyt</surname><given-names>PM</given-names> </name><etal/></person-group><article-title>The PRISMA 2020 statement: an updated guideline for reporting systematic reviews</article-title><source>J Clin Epidemiol</source><year>2021</year><month>06</month><volume>134</volume><fpage>178</fpage><lpage>189</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2021.03.001</pub-id><pub-id pub-id-type="medline">33789819</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Munn</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Stern</surname><given-names>C</given-names> </name><name name-style="western"><surname>Aromataris</surname><given-names>E</given-names> </name><name name-style="western"><surname>Lockwood</surname><given-names>C</given-names> </name><name name-style="western"><surname>Jordan</surname><given-names>Z</given-names> </name></person-group><article-title>What kind of systematic review should I conduct? A proposed typology and guidance for systematic reviewers in the medical and health sciences</article-title><source>BMC Med Res Methodol</source><year>2018</year><month>01</month><day>10</day><volume>18</volume><issue>1</issue><fpage>5</fpage><pub-id pub-id-type="doi">10.1186/s12874-017-0468-4</pub-id><pub-id pub-id-type="medline">29316881</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Michelson</surname><given-names>M</given-names> </name><name name-style="western"><surname>Reuter</surname><given-names>K</given-names> </name></person-group><article-title>The significant cost of systematic reviews and meta-analyses: a call for greater involvement of machine learning to assess the promise of clinical trials</article-title><source>Contemp Clin Trials Commun</source><year>2019</year><volume>16</volume><fpage>100443</fpage><pub-id pub-id-type="doi">10.1016/j.conctc.2019.100443</pub-id><pub-id pub-id-type="medline">31497675</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tsafnat</surname><given-names>G</given-names> </name><name name-style="western"><surname>Glasziou</surname><given-names>P</given-names> </name><name name-style="western"><surname>Choong</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Dunn</surname><given-names>A</given-names> </name><name name-style="western"><surname>Galgani</surname><given-names>F</given-names> </name><name name-style="western"><surname>Coiera</surname><given-names>E</given-names> </name></person-group><article-title>Systematic review automation technologies</article-title><source>Syst Rev</source><year>2014</year><month>07</month><day>9</day><volume>3</volume><fpage>74</fpage><pub-id pub-id-type="doi">10.1186/2046-4053-3-74</pub-id><pub-id pub-id-type="medline">25005128</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bastian</surname><given-names>H</given-names> </name><name name-style="western"><surname>Glasziou</surname><given-names>P</given-names> </name><name name-style="western"><surname>Chalmers</surname><given-names>I</given-names> </name></person-group><article-title>Seventy-five trials and eleven systematic reviews a day: how will we ever keep up?</article-title><source>PLoS Med</source><year>2010</year><month>09</month><day>21</day><volume>7</volume><issue>9</issue><fpage>e1000326</fpage><pub-id pub-id-type="doi">10.1371/journal.pmed.1000326</pub-id><pub-id pub-id-type="medline">20877712</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Borah</surname><given-names>R</given-names> </name><name name-style="western"><surname>Brown</surname><given-names>AW</given-names> </name><name name-style="western"><surname>Capers</surname><given-names>PL</given-names> </name><name name-style="western"><surname>Kaiser</surname><given-names>KA</given-names> </name></person-group><article-title>Analysis of the time and workers needed to conduct systematic reviews of medical interventions using data from the PROSPERO registry</article-title><source>BMJ Open</source><year>2017</year><month>02</month><day>27</day><volume>7</volume><issue>2</issue><fpage>e012545</fpage><pub-id pub-id-type="doi">10.1136/bmjopen-2016-012545</pub-id><pub-id pub-id-type="medline">28242767</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Potthast</surname><given-names>M</given-names> </name><name name-style="western"><surname>Scells</surname><given-names>H</given-names> </name><name name-style="western"><surname>Koopman</surname><given-names>B</given-names> </name><name name-style="western"><surname>Zhuang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zuccon</surname><given-names>G</given-names> </name><etal/></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Goharian</surname><given-names>N</given-names> </name><name name-style="western"><surname>Tonellotto</surname><given-names>N</given-names> </name><name name-style="western"><surname>He</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Lipani</surname><given-names>A</given-names> </name><name name-style="western"><surname>McDonald</surname><given-names>G</given-names> </name><name name-style="western"><surname>Macdonald</surname><given-names>C</given-names> </name><name name-style="western"><surname>Ounis</surname><given-names>I</given-names> </name></person-group><article-title>Zero-shot generative large language models for systematic review screening automation</article-title><source>Advances in Information Retrieval</source><year>2024</year><publisher-name>Springer</publisher-name><fpage>403</fpage><lpage>420</lpage></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Guo</surname><given-names>E</given-names> </name><name name-style="western"><surname>Gupta</surname><given-names>M</given-names> </name><name name-style="western"><surname>Deng</surname><given-names>J</given-names> </name><name name-style="western"><surname>Park</surname><given-names>YJ</given-names> </name><name name-style="western"><surname>Paget</surname><given-names>M</given-names> </name><name name-style="western"><surname>Naugler</surname><given-names>C</given-names> </name></person-group><article-title>Automated paper screening for clinical reviews using large language models: data analysis study</article-title><source>J Med Internet Res</source><year>2024</year><month>01</month><day>12</day><volume>26</volume><fpage>e48996</fpage><pub-id pub-id-type="doi">10.2196/48996</pub-id><pub-id pub-id-type="medline">38214966</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>M</given-names> </name><name name-style="western"><surname>Sun</surname><given-names>J</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>X</given-names> </name></person-group><article-title>Evaluating the effectiveness of large language models in abstract screening: a comparative analysis</article-title><source>Syst Rev</source><year>2024</year><month>08</month><day>21</day><volume>13</volume><issue>1</issue><fpage>219</fpage><pub-id pub-id-type="doi">10.1186/s13643-024-02609-x</pub-id><pub-id pub-id-type="medline">39169386</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Khraisha</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Put</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kappenberg</surname><given-names>J</given-names> </name><name name-style="western"><surname>Warraitch</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hadfield</surname><given-names>K</given-names> </name></person-group><article-title>Can large language models replace humans in systematic reviews? Evaluating GPT-4&#x2019;s efficacy in screening and extracting data from peer-reviewed and grey literature in multiple languages</article-title><source>Res Synth Methods</source><year>2024</year><month>07</month><volume>15</volume><issue>4</issue><fpage>616</fpage><lpage>626</lpage><pub-id pub-id-type="doi">10.1002/jrsm.1715</pub-id><pub-id pub-id-type="medline">38484744</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gartlehner</surname><given-names>G</given-names> </name><name name-style="western"><surname>Kahwati</surname><given-names>L</given-names> </name><name name-style="western"><surname>Hilscher</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Data extraction for evidence synthesis using a large language model: a proof-of-concept study</article-title><source>Res Synth Methods</source><year>2024</year><month>07</month><volume>15</volume><issue>4</issue><fpage>576</fpage><lpage>589</lpage><pub-id pub-id-type="doi">10.1002/jrsm.1710</pub-id><pub-id pub-id-type="medline">38432227</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Cao</surname><given-names>L</given-names> </name><name name-style="western"><surname>Danek</surname><given-names>B</given-names> </name><name name-style="western"><surname>Jin</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Sun</surname><given-names>J</given-names> </name></person-group><article-title>Accelerating clinical evidence synthesis with large language models</article-title><source>NPJ Digit Med</source><year>2025</year><month>08</month><day>8</day><volume>8</volume><issue>1</issue><fpage>509</fpage><pub-id pub-id-type="doi">10.1038/s41746-025-01840-7</pub-id><pub-id pub-id-type="medline">40775042</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Du</surname><given-names>J</given-names> </name><name name-style="western"><surname>Soysal</surname><given-names>E</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Machine learning models for abstract screening task - a systematic literature review application for health economics and outcome research</article-title><source>BMC Med Res Methodol</source><year>2024</year><month>05</month><day>9</day><volume>24</volume><issue>1</issue><fpage>108</fpage><pub-id pub-id-type="doi">10.1186/s12874-024-02224-3</pub-id><pub-id pub-id-type="medline">38724903</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Oude Wolcherink</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Pouwels</surname><given-names>XG</given-names> </name><name name-style="western"><surname>van Dijk</surname><given-names>SH</given-names> </name><name name-style="western"><surname>Doggen</surname><given-names>CJ</given-names> </name><name name-style="western"><surname>Koffijberg</surname><given-names>H</given-names> </name></person-group><article-title>Can artificial intelligence separate the wheat from the chaff in systematic reviews of health economic articles?</article-title><source>Expert Rev Pharmacoecon Outcomes Res</source><year>2023</year><volume>23</volume><issue>9</issue><fpage>1049</fpage><lpage>1056</lpage><pub-id pub-id-type="doi">10.1080/14737167.2023.2234639</pub-id><pub-id pub-id-type="medline">37573521</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Manion</surname><given-names>FJ</given-names> </name><name name-style="western"><surname>Du</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Accelerating evidence synthesis in observational studies: development of a living natural language processing-assisted intelligent systematic literature review system</article-title><source>JMIR Med Inform</source><year>2024</year><month>10</month><day>23</day><volume>12</volume><fpage>e54653</fpage><pub-id pub-id-type="doi">10.2196/54653</pub-id><pub-id pub-id-type="medline">39441204</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wantuch</surname><given-names>PL</given-names> </name><name name-style="western"><surname>Avci</surname><given-names>FY</given-names> </name></person-group><article-title>Invasive pneumococcal disease in relation to vaccine type serotypes</article-title><source>Hum Vaccin Immunother</source><year>2019</year><volume>15</volume><issue>4</issue><fpage>874</fpage><lpage>875</lpage><pub-id pub-id-type="doi">10.1080/21645515.2018.1564444</pub-id><pub-id pub-id-type="medline">30668209</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Musher</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Anderson</surname><given-names>R</given-names> </name><name name-style="western"><surname>Feldman</surname><given-names>C</given-names> </name></person-group><article-title>The remarkable history of pneumococcal vaccination: an ongoing challenge</article-title><source>Pneumonia (Nathan)</source><year>2022</year><month>09</month><day>25</day><volume>14</volume><issue>1</issue><fpage>5</fpage><pub-id pub-id-type="doi">10.1186/s41479-022-00097-y</pub-id><pub-id pub-id-type="medline">36153636</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Micoli</surname><given-names>F</given-names> </name><name name-style="western"><surname>Romano</surname><given-names>MR</given-names> </name><name name-style="western"><surname>Carboni</surname><given-names>F</given-names> </name><name name-style="western"><surname>Adamo</surname><given-names>R</given-names> </name><name name-style="western"><surname>Berti</surname><given-names>F</given-names> </name></person-group><article-title>Strengths and weaknesses of pneumococcal conjugate vaccines</article-title><source>Glycoconj J</source><year>2023</year><month>04</month><volume>40</volume><issue>2</issue><fpage>135</fpage><lpage>148</lpage><pub-id pub-id-type="doi">10.1007/s10719-023-10100-3</pub-id><pub-id pub-id-type="medline">36652051</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tong</surname><given-names>S</given-names> </name><name name-style="western"><surname>Amand</surname><given-names>C</given-names> </name><name name-style="western"><surname>Kieffer</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kyaw</surname><given-names>MH</given-names> </name></person-group><article-title>Trends in healthcare utilization and costs associated with pneumonia in the United States during 2008-2014</article-title><source>BMC Health Serv Res</source><year>2018</year><month>09</month><day>14</day><volume>18</volume><issue>1</issue><fpage>715</fpage><pub-id pub-id-type="doi">10.1186/s12913-018-3529-4</pub-id><pub-id pub-id-type="medline">30217156</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ouldali</surname><given-names>N</given-names> </name><name name-style="western"><surname>Levy</surname><given-names>C</given-names> </name><name name-style="western"><surname>Minodier</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Long-term association of 13-valent pneumococcal conjugate vaccine implementation with rates of community-acquired pneumonia in children</article-title><source>JAMA Pediatr</source><year>2019</year><month>04</month><day>1</day><volume>173</volume><issue>4</issue><fpage>362</fpage><lpage>370</lpage><pub-id pub-id-type="doi">10.1001/jamapediatrics.2018.5273</pub-id><pub-id pub-id-type="medline">30715140</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hern&#x00E1;ndez</surname><given-names>S</given-names> </name><name name-style="western"><surname>Navas</surname><given-names>E</given-names> </name><name name-style="western"><surname>Aznar-Lou</surname><given-names>I</given-names> </name><etal/></person-group><article-title>Impact of the 13-valent conjugated pneumococcal vaccine on the direct costs of invasive pneumococcal disease requiring hospital admission in children aged &#x003C;5 years: a prospective study</article-title><source>Vaccines (Basel)</source><year>2020</year><month>07</month><day>15</day><volume>8</volume><issue>3</issue><fpage>387</fpage><pub-id pub-id-type="doi">10.3390/vaccines8030387</pub-id><pub-id pub-id-type="medline">32679762</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mohanty</surname><given-names>S</given-names> </name><name name-style="western"><surname>Hu</surname><given-names>T</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>G</given-names> </name><name name-style="western"><surname>Khan</surname><given-names>TK</given-names> </name><name name-style="western"><surname>Owusu-Edusei</surname><given-names>K</given-names> </name><name name-style="western"><surname>Sukarom</surname><given-names>I</given-names> </name></person-group><article-title>Health and economic burden associated with 15-valent pneumococcal conjugate vaccine serotypes in Korea and Hong Kong</article-title><source>Hum Vaccin Immunother</source><year>2022</year><month>11</month><day>30</day><volume>18</volume><issue>5</issue><fpage>2046433</fpage><pub-id pub-id-type="doi">10.1080/21645515.2022.2046433</pub-id><pub-id pub-id-type="medline">35420975</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jimenez Trujillo</surname><given-names>I</given-names> </name><name name-style="western"><surname>Lopez de Andres</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hernandez-Barrera</surname><given-names>V</given-names> </name><name name-style="western"><surname>Martinez-Huedo</surname><given-names>MA</given-names> </name><name name-style="western"><surname>de Miguel-Diez</surname><given-names>J</given-names> </name><name name-style="western"><surname>Jimenez-Garcia</surname><given-names>R</given-names> </name></person-group><article-title>Decrease in the incidence and in hospital mortality of community-acquired pneumonia among children in Spain (2001-2014)</article-title><source>Vaccine</source><year>2017</year><month>06</month><day>27</day><volume>35</volume><issue>30</issue><fpage>3733</fpage><lpage>3740</lpage><pub-id pub-id-type="doi">10.1016/j.vaccine.2017.05.055</pub-id><pub-id pub-id-type="medline">28558982</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Luca</surname><given-names>DL</given-names> </name><name name-style="western"><surname>Kwong</surname><given-names>JC</given-names> </name><name name-style="western"><surname>Chu</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Impact of pneumococcal vaccination on pneumonia hospitalizations and related costs in Ontario: a population-based ecological study</article-title><source>Clin Infect Dis</source><year>2018</year><month>02</month><day>1</day><volume>66</volume><issue>4</issue><fpage>541</fpage><lpage>547</lpage><pub-id pub-id-type="doi">10.1093/cid/cix850</pub-id><pub-id pub-id-type="medline">29029063</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Arguedas</surname><given-names>A</given-names> </name><name name-style="western"><surname>Soley</surname><given-names>C</given-names> </name><name name-style="western"><surname>Abdelnour</surname><given-names>A</given-names> </name></person-group><article-title>Prevenar experience</article-title><source>Vaccine (Auckl)</source><year>2011</year><month>09</month><day>14</day><volume>29 Suppl 3</volume><issue>Supplement 3</issue><fpage>C26</fpage><lpage>C34</lpage><pub-id pub-id-type="doi">10.1016/j.vaccine.2011.06.104</pub-id><pub-id pub-id-type="medline">21896350</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cill&#x00F3;niz</surname><given-names>C</given-names> </name><name name-style="western"><surname>Amaro</surname><given-names>R</given-names> </name><name name-style="western"><surname>Torres</surname><given-names>A</given-names> </name></person-group><article-title>Pneumococcal vaccination</article-title><source>Curr Opin Infect Dis</source><year>2016</year><month>04</month><volume>29</volume><issue>2</issue><fpage>187</fpage><lpage>196</lpage><pub-id pub-id-type="doi">10.1097/QCO.0000000000000246</pub-id><pub-id pub-id-type="medline">26779776</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nowalk</surname><given-names>MP</given-names> </name><name name-style="western"><surname>Wateska</surname><given-names>AR</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>CJ</given-names> </name><etal/></person-group><article-title>Racial disparities in adult pneumococcal vaccination indications and pneumococcal hospitalizations in the U.S</article-title><source>J Natl Med Assoc</source><year>2019</year><month>10</month><volume>111</volume><issue>5</issue><fpage>540</fpage><lpage>545</lpage><pub-id pub-id-type="doi">10.1016/j.jnma.2019.04.011</pub-id><pub-id pub-id-type="medline">31171344</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wateska</surname><given-names>AR</given-names> </name><name name-style="western"><surname>Nowalk</surname><given-names>MP</given-names> </name><name name-style="western"><surname>Jalal</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Is further research on adult pneumococcal vaccine uptake improvement programs worthwhile? &#x0391; value of information analysis</article-title><source>Vaccine</source><year>2021</year><month>06</month><day>16</day><volume>39</volume><issue>27</issue><fpage>3608</fpage><lpage>3613</lpage><pub-id pub-id-type="doi">10.1016/j.vaccine.2021.05.037</pub-id><pub-id pub-id-type="medline">34045104</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sehatzadeh</surname><given-names>S</given-names> </name></person-group><article-title>Influenza and pneumococcal vaccinations for patients with chronic obstructive pulmonary disease (COPD): an evidence-based review</article-title><source>Ont Health Technol Assess Ser</source><year>2012</year><volume>12</volume><issue>3</issue><fpage>1</fpage><lpage>64</lpage><pub-id pub-id-type="medline">23074431</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Smith</surname><given-names>KJ</given-names> </name><name name-style="western"><surname>Nowalk</surname><given-names>MP</given-names> </name><name name-style="western"><surname>Raymund</surname><given-names>M</given-names> </name><name name-style="western"><surname>Zimmerman</surname><given-names>RK</given-names> </name></person-group><article-title>Cost-effectiveness of pneumococcal conjugate vaccination in immunocompromised adults</article-title><source>Vaccine</source><year>2013</year><month>08</month><day>20</day><volume>31</volume><issue>37</issue><fpage>3950</fpage><lpage>3956</lpage><pub-id pub-id-type="doi">10.1016/j.vaccine.2013.06.037</pub-id><pub-id pub-id-type="medline">23806240</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Weycker</surname><given-names>D</given-names> </name><name name-style="western"><surname>Farkouh</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Strutton</surname><given-names>DR</given-names> </name><name name-style="western"><surname>Edelsberg</surname><given-names>J</given-names> </name><name name-style="western"><surname>Shea</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Pelton</surname><given-names>SI</given-names> </name></person-group><article-title>Rates and costs of invasive pneumococcal disease and pneumonia in persons with underlying medical conditions</article-title><source>BMC Health Serv Res</source><year>2016</year><month>05</month><day>13</day><volume>16</volume><fpage>182</fpage><pub-id pub-id-type="doi">10.1186/s12913-016-1432-4</pub-id><pub-id pub-id-type="medline">27177430</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dupuis</surname><given-names>C</given-names> </name><name name-style="western"><surname>Sabra</surname><given-names>A</given-names> </name><name name-style="western"><surname>Patrier</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Burden of pneumococcal pneumonia requiring ICU admission in France: 1-year prognosis, resources use, and costs</article-title><source>Crit Care</source><year>2021</year><month>01</month><day>10</day><volume>25</volume><issue>1</issue><fpage>24</fpage><pub-id pub-id-type="doi">10.1186/s13054-020-03442-z</pub-id><pub-id pub-id-type="medline">33423691</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Patrick</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Woolley</surname><given-names>FR</given-names> </name></person-group><article-title>A cost-benefit analysis of immunization for pneumococcal pneumonia</article-title><source>JAMA</source><year>1981</year><month>02</month><day>6</day><volume>245</volume><issue>5</issue><fpage>473</fpage><lpage>477</lpage><pub-id pub-id-type="doi">10.1001/jama.1981.03310300027014</pub-id><pub-id pub-id-type="medline">6779017</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gable</surname><given-names>CB</given-names> </name><name name-style="western"><surname>Holzer</surname><given-names>SS</given-names> </name><name name-style="western"><surname>Engelhart</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Pneumococcal vaccine. Efficacy and associated cost savings</article-title><source>JAMA</source><year>1990</year><month>12</month><day>12</day><volume>264</volume><issue>22</issue><fpage>2910</fpage><lpage>2915</lpage><pub-id pub-id-type="doi">10.1001/jama.264.22.2910</pub-id><pub-id pub-id-type="medline">2232086</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>J</given-names> </name><name name-style="western"><surname>O&#x2019;Brien</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>HK</given-names> </name><name name-style="western"><surname>Grabenstein</surname><given-names>JD</given-names> </name><name name-style="western"><surname>Dasbach</surname><given-names>EJ</given-names> </name></person-group><article-title>Cost-effectiveness of pneumococcal vaccines for adults in the United States</article-title><source>Adv Ther</source><year>2014</year><month>04</month><volume>31</volume><issue>4</issue><fpage>392</fpage><lpage>409</lpage><pub-id pub-id-type="doi">10.1007/s12325-014-0115-y</pub-id><pub-id pub-id-type="medline">24718851</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Morbey</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Elliot</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Harcourt</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Estimating the burden on general practitioner services in England from increases in respiratory disease associated with seasonal respiratory pathogen activity</article-title><source>Epidemiol Infect</source><year>2018</year><month>08</month><volume>146</volume><issue>11</issue><fpage>1389</fpage><lpage>1396</lpage><pub-id pub-id-type="doi">10.1017/S0950268818000262</pub-id><pub-id pub-id-type="medline">29972108</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ghosh</surname><given-names>M</given-names> </name><name name-style="western"><surname>Mukherjee</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ganguly</surname><given-names>A</given-names> </name><name name-style="western"><surname>Basuchowdhuri</surname><given-names>P</given-names> </name><name name-style="western"><surname>Naskar</surname><given-names>SK</given-names> </name><name name-style="western"><surname>Ganguly</surname><given-names>D</given-names> </name></person-group><article-title>AlpaPICO: extraction of PICO frames from clinical trial documents using LLMs</article-title><source>Methods</source><year>2024</year><month>06</month><volume>226</volume><fpage>78</fpage><lpage>88</lpage><pub-id pub-id-type="doi">10.1016/j.ymeth.2024.04.005</pub-id><pub-id pub-id-type="medline">38643910</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schmidt</surname><given-names>L</given-names> </name><name name-style="western"><surname>Sinyor</surname><given-names>M</given-names> </name><name name-style="western"><surname>Webb</surname><given-names>RT</given-names> </name><etal/></person-group><article-title>A narrative review of recent tools and innovations toward automating living systematic reviews and evidence syntheses</article-title><source>Z Evid Fortbild Qual Gesundhwes</source><year>2023</year><month>09</month><volume>181</volume><fpage>65</fpage><lpage>75</lpage><pub-id pub-id-type="doi">10.1016/j.zefq.2023.06.007</pub-id><pub-id pub-id-type="medline">37596160</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Marshall</surname><given-names>IJ</given-names> </name><name name-style="western"><surname>Nye</surname><given-names>B</given-names> </name><name name-style="western"><surname>Kuiper</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Trialstreamer: a living, automatically updated database of clinical trial reports</article-title><source>J Am Med Inform Assoc</source><year>2020</year><month>12</month><day>9</day><volume>27</volume><issue>12</issue><fpage>1903</fpage><lpage>1912</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocaa163</pub-id><pub-id pub-id-type="medline">32940710</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Howard</surname><given-names>BE</given-names> </name><name name-style="western"><surname>Phillips</surname><given-names>J</given-names> </name><name name-style="western"><surname>Miller</surname><given-names>K</given-names> </name><etal/></person-group><article-title>SWIFT-Review: a text-mining workbench for systematic review</article-title><source>Syst Rev</source><year>2016</year><month>05</month><day>23</day><volume>5</volume><fpage>87</fpage><pub-id pub-id-type="doi">10.1186/s13643-016-0263-z</pub-id><pub-id pub-id-type="medline">27216467</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kamra</surname><given-names>S</given-names> </name><name name-style="western"><surname>Hyderboini</surname><given-names>R</given-names> </name><name name-style="western"><surname>Sirumalla</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>MSR70 pilot study to evaluate efficiency of DISTILLERSR&#x00AE;&#x2019;S artificial intelligence (AI) tool over manual screening process in literature review</article-title><source>Value Health</source><year>2022</year><month>07</month><volume>25</volume><issue>7</issue><fpage>S532</fpage><pub-id pub-id-type="doi">10.1016/j.jval.2022.04.1277</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sauca</surname><given-names>M</given-names> </name><name name-style="western"><surname>Tarchand</surname><given-names>R</given-names> </name><name name-style="western"><surname>Kallmes</surname><given-names>K</given-names> </name></person-group><article-title>HTA361 Living systematic review (LSR) in health technology assessment (HTA): current guidance, methods, and challenges</article-title><source>Value Health</source><year>2023</year><month>12</month><volume>26</volume><issue>12</issue><fpage>S390</fpage><pub-id pub-id-type="doi">10.1016/j.jval.2023.09.2044</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Howard</surname><given-names>BE</given-names> </name><name name-style="western"><surname>Phillips</surname><given-names>J</given-names> </name><name name-style="western"><surname>Tandon</surname><given-names>A</given-names> </name><etal/></person-group><article-title>SWIFT-Active Screener: accelerated document screening through active learning and integrated recall estimation</article-title><source>Environ Int</source><year>2020</year><month>05</month><volume>138</volume><fpage>105623</fpage><pub-id pub-id-type="doi">10.1016/j.envint.2020.105623</pub-id><pub-id pub-id-type="medline">32203803</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Wallace</surname><given-names>BC</given-names> </name><name name-style="western"><surname>Small</surname><given-names>K</given-names> </name><name name-style="western"><surname>Brodley</surname><given-names>CE</given-names> </name><name name-style="western"><surname>Lau</surname><given-names>J</given-names> </name><name name-style="western"><surname>Trikalinos</surname><given-names>TA</given-names> </name></person-group><article-title>Deploying an interactive machine learning system in an evidence-based practice center</article-title><conf-name>IHI &#x2019;12: ACM International Health Informatics Symposium</conf-name><conf-date>Jan 28-30, 2012</conf-date><pub-id pub-id-type="doi">10.1145/2110363.2110464</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Thomas</surname><given-names>J</given-names> </name><name name-style="western"><surname>Graziosi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Brunton</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ghouze</surname><given-names>Z</given-names> </name><name name-style="western"><surname>O&#x2019;Driscoll</surname><given-names>P</given-names> </name><name name-style="western"><surname>Bond</surname><given-names>M</given-names> </name><etal/></person-group><article-title>EPPI-Reviewer: advanced software for systematic reviews, maps and evidence synthesis</article-title><source>EPPI Centre, UCL Social Research Institute, University College London</source><year>2023</year><access-date>2026-05-08</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://eppi.ioe.ac.uk/cms/resources/tools/eppi-reviewer-introduction">https://eppi.ioe.ac.uk/cms/resources/tools/eppi-reviewer-introduction</ext-link></comment></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Marshall</surname><given-names>IJ</given-names> </name><name name-style="western"><surname>Kuiper</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wallace</surname><given-names>BC</given-names> </name></person-group><article-title>RobotReviewer: evaluation of a system for automatically assessing bias in clinical trials</article-title><source>J Am Med Inform Assoc</source><year>2016</year><month>01</month><volume>23</volume><issue>1</issue><fpage>193</fpage><lpage>201</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocv044</pub-id><pub-id pub-id-type="medline">26104742</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cassell</surname><given-names>K</given-names> </name><name name-style="western"><surname>Ologunowa</surname><given-names>A</given-names> </name><name name-style="western"><surname>Rastegar-Mojarad</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Analysis of article screening and data extraction performance by an AI systematic literature review platform</article-title><source>Front Artif Intell</source><year>2025</year><volume>8</volume><fpage>1662202</fpage><pub-id pub-id-type="doi">10.3389/frai.2025.1662202</pub-id><pub-id pub-id-type="medline">41356671</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Prompt structure, templates, and instructions used in the large language model prompt to guide the screening processes.</p><media xlink:href="ai_v5i1e81049_app1.docx" xlink:title="DOCX File, 24 KB"/></supplementary-material></app-group></back></article>