<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="letter"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR AI</journal-id><journal-id journal-id-type="publisher-id">ai</journal-id><journal-id journal-id-type="index">41</journal-id><journal-title>JMIR AI</journal-title><abbrev-journal-title>JMIR AI</abbrev-journal-title><issn pub-type="epub">2817-1705</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v5i1e91981</article-id><article-id pub-id-type="doi">10.2196/91981</article-id><article-categories><subj-group subj-group-type="heading"><subject>Letter to the Editor</subject></subj-group></article-categories><title-group><article-title>Authors&#x2019; Reply: Toward Retrieval-Grounded Evaluation for Conversational Large Language Model&#x2013;Based Risk Assessment</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Roshani</surname><given-names>Mohammad Amin</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Zhou</surname><given-names>Xiangyu</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Qiang</surname><given-names>Yao</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Suresh</surname><given-names>Srinivasan</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Hicks</surname><given-names>Steve</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Sethuraman</surname><given-names>Usha</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Zhu</surname><given-names>Dongxiao</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Computer Science, Wayne State University</institution><addr-line>5057 Woodward Ave</addr-line><addr-line>Detroit</addr-line><addr-line>MI</addr-line><country>United States</country></aff><aff id="aff2"><institution>Oakland University</institution><addr-line>Rochester</addr-line><addr-line>MI</addr-line><country>United States</country></aff><aff id="aff3"><institution>UPMC Children's Hospital of Pittsburgh</institution><addr-line>Pittsburgh</addr-line><addr-line>PA</addr-line><country>United States</country></aff><aff id="aff4"><institution>Department of Pediatrics, Pennsylvania State University</institution><addr-line>Hershey</addr-line><addr-line>PA</addr-line><country>United States</country></aff><aff id="aff5"><institution>Children's Hospital of Michigan</institution><addr-line>Detroit</addr-line><addr-line>MI</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Coristine</surname><given-names>Andrew</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Dongxiao Zhu, PhD, Department of Computer Science, Wayne State University, 5057 Woodward Ave, Detroit, MI, 48202, United States, 1 3135773104; <email>dzhu@wayne.edu</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>12</day><month>3</month><year>2026</year></pub-date><volume>5</volume><elocation-id>e91981</elocation-id><history><date date-type="received"><day>30</day><month>01</month><year>2026</year></date><date date-type="accepted"><day>17</day><month>02</month><year>2026</year></date></history><copyright-statement>&#x00A9; Dongxiao Zhu, Mohammad Amin Roshani, Xiangyu Zhou, Srinivasan Suresh Yao Qiang, Steve Hicks, Usha Sethuraman. Originally published in JMIR AI (<ext-link ext-link-type="uri" xlink:href="https://ai.jmir.org">https://ai.jmir.org</ext-link>),, 12.3.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR AI, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.ai.jmir.org/">https://www.ai.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://ai.jmir.org/2026/1/e91981"/><related-article related-article-type="commentary article" ext-link-type="doi" xlink:href="10.2196/67363" xlink:title="Comment on" xlink:type="simple">https://ai.jmir.org/2025/1/e67363</related-article><related-article related-article-type="commentary article" ext-link-type="doi" xlink:href="https://doi.org/10.2196/90759" xlink:title="Comment on" xlink:type="simple">https://ai.jmir.org/2026/1/e90759</related-article><kwd-group><kwd>personalized risk assessment</kwd><kwd>large language model</kwd><kwd>artificial intelligence</kwd><kwd>conversational AI</kwd><kwd>COVID-19</kwd></kwd-group></article-meta></front><body><p>We thank the authors of the letter to the editor [<xref ref-type="bibr" rid="ref1">1</xref>] for their thoughtful perspective and for highlighting the potential value of retrieval-grounded sensitivity analyses alongside conventional area under the receiver operating characteristic curve (AUC) reporting. We welcome this opportunity to further clarify the design choices and evaluation scope of our published work.</p><p>As described in Roshani et al [<xref ref-type="bibr" rid="ref2">2</xref>], the system explicitly provides 2 distinct user-facing interfaces: a clinician-facing interface and a patient-facing interface (these are shown in Figure 3 in Roshani et al [<xref ref-type="bibr" rid="ref2">2</xref>]), each designed for a different purpose. We agree that subgroup-level audits are essential; accordingly, our clinician-facing interface already supports stratified performance review (eg, AUC by demographic variables), laying the groundwork for more systematic fairness analyses in future versions.</p><p>In contrast, the patient-facing interface prioritizes interpretability and usability rather than aggregate performance metrics. It presents both a binary classification (severe vs nonsevere) and an individualized continuous risk score derived from the model&#x2019;s logit output, where higher values indicate greater severity. This output is further complemented by attention-based feature importance to support transparent, conversational risk assessment. Notably, the patient-facing interface produces no free text in clinical language, so the risk of hallucinated clinical statements is not applicable to our system&#x2019;s current design.</p><p>Regarding retrieval grounding, the letter raises an important direction for future evaluation. We note that retrieval-based augmentation requires the availability of stable, domain-specific corpora. In the published study, our primary objective was to investigate the feasibility and performance of generative white-box large language models (LLMs) in low-data, emergent-disease settings. As such, we primarily considered a novel infection scenario, in which trusted external knowledge sources may be sparse, incomplete, or evolving. This was the case for pediatric COVID-19 during its early emergence, when authoritative, age-specific guidelines were limited, making LLM-only approaches more practical at the time. In this context, the system is intended to provide an initial, clinician-informed risk assessment, leveraging a small number of curated cases incorporated during fine-tuning, rather than relying on external retrieval. We therefore presented the reported system as an initial in-house baseline, designed to function in early-stage or data-limited settings.</p><p>As clinical knowledge bases mature, the same conversational pipeline can naturally be extended to incorporate explicit evidence grounding and source citation, enabling more personalized and evidence-supported risk assessment. We appreciate the authors&#x2019; comments in highlighting this direction and view it as complementary to, rather than in conflict with, the scope and objectives of the published work.</p></body><back><notes><sec><title>Funding</title><p>Research reported in this publication was supported by the Eunice Kennedy Shriver Institute of Child Health and Human Development of the National Institute of Health under awards R61HD105610 and R33HD105610.</p></sec></notes><fn-group><fn fn-type="conflict"><p>SH is named as a co-inventor on a patent for the diagnostic use of salivary RNA in neurologic disorders. He previously served as a scientific advisory board member for Quadrant Biosciences and Spectrum Solutions. All other authors declare no conflicts of interest.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AUC</term><def><p>area under the receiver operating characteristic curve</p></def></def-item><def-item><term id="abb2">LLM</term><def><p>large language model</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hu</surname><given-names>Y</given-names> </name></person-group><article-title>Toward retrieval-grounded evaluation for conversational large language model&#x2013;based risk assessment</article-title><source>JMIR AI</source><year>2026</year><volume>5</volume><fpage>e90759</fpage><pub-id pub-id-type="doi">10.2196/90759</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Roshani</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>X</given-names> </name><name name-style="western"><surname>Qiang</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Generative large language model-powered conversational AI app for personalized risk assessment: case study in COVID-19</article-title><source>JMIR AI</source><year>2025</year><month>03</month><day>27</day><volume>4</volume><fpage>e67363</fpage><pub-id pub-id-type="doi">10.2196/67363</pub-id><pub-id pub-id-type="medline">40146990</pub-id></nlm-citation></ref></ref-list></back></article>