<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR AI</journal-id><journal-id journal-id-type="publisher-id">ai</journal-id><journal-id journal-id-type="index">41</journal-id><journal-title>JMIR AI</journal-title><abbrev-journal-title>JMIR AI</abbrev-journal-title><issn pub-type="epub">2817-1705</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v4i1e76203</article-id><article-id pub-id-type="doi">10.2196/76203</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Assessing the Capability of Large Language Models for Navigation of the Australian Health Care System: Comparative Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Simmich</surname><given-names>Joshua</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Ross</surname><given-names>Megan Heather</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Russell</surname><given-names>Trevor Glen</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib></contrib-group><aff id="aff1"><institution>RECOVER Injury Research Centre, Faculty of Health, Medicine and Behavioural Sciences, The University of Queensland</institution><addr-line>288 Herston Rd, Queensland</addr-line><addr-line>Brisbane</addr-line><country>Australia</country></aff><aff id="aff2"><institution>STARS Education and Research Alliance, Surgical Treatment and Rehabilitation Service (STARS), University of Queensland and Metro North Health</institution><addr-line>Brisbane</addr-line><country>Australia</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Liu</surname><given-names>Hongfang</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Wu</surname><given-names>Chaochen</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Pushpanathan</surname><given-names>Krithi</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Joshua Simmich, PhD, RECOVER Injury Research Centre, Faculty of Health, Medicine and Behavioural Sciences, The University of Queensland, 288 Herston Rd, Queensland, Brisbane, 4029, Australia, +61 7 3365 5560; <email>uqjsimmi@uq.edu.au</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>7</day><month>10</month><year>2025</year></pub-date><volume>4</volume><elocation-id>e76203</elocation-id><history><date date-type="received"><day>18</day><month>04</month><year>2025</year></date><date date-type="rev-recd"><day>24</day><month>08</month><year>2025</year></date><date date-type="accepted"><day>27</day><month>08</month><year>2025</year></date></history><copyright-statement>&#x00A9; Joshua Simmich, Megan Heather Ross, Trevor Glen Russell. Originally published in JMIR AI (<ext-link ext-link-type="uri" xlink:href="https://ai.jmir.org">https://ai.jmir.org</ext-link>), 7.10.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR AI, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.ai.jmir.org/">https://www.ai.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://ai.jmir.org/2025/1/e76203"/><abstract><sec><title>Background</title><p>Australians can face significant challenges in navigating the health care system, especially in rural and regional areas. Generative search tools, powered by large language models (LLMs), show promise in improving health information retrieval by generating direct answers. However, concerns remain regarding their accuracy and reliability when compared to traditional search engines in a health care context.</p></sec><sec><title>Objective</title><p>This study aimed to compare the effectiveness of a generative artificial intelligence (AI) search (ie, Microsoft Copilot) versus a conventional search engine (Google Web Search) for navigating health care information.</p></sec><sec sec-type="methods"><title>Methods</title><p>A total of 97 adults in Queensland, Australia, participated in a web-based survey, answering scenario-based health care navigation questions using either Microsoft Copilot or Google Web Search. Accuracy was assessed using binary correct or incorrect ratings, graded correctness (incorrect, partially correct, or correct), and numerical scores (0&#x2010;2 for service identification and 0&#x2010;6 for criteria). Participants also completed a Technology Rating Questionnaire (TRQ) to evaluate their experience with their assigned tool.</p></sec><sec sec-type="results"><title>Results</title><p>Participants assigned to Microsoft Copilot outperformed the Google Web Search group on 2 health care navigation tasks (identifying aged care application services and listing mobility allowance eligibility criteria), with no clear evidence of a difference in the remaining 6 tasks. On the TRQ, participants rated Google Web Search higher in willingness to adopt and perceived impact on quality of life, and lower in effort needed to learn. Both tools received similar ratings in perceived value, confidence, help required to use, and concerns about privacy.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Generative AI tools can achieve comparable accuracy to traditional search engines for health care navigation tasks, though this did not translate into an improved user experience. Further evaluation is needed as AI technology improves and users become more familiar with its use.</p></sec></abstract><kwd-group><kwd>artificial intelligence</kwd><kwd>large language models</kwd><kwd>search engines</kwd><kwd>Australia</kwd><kwd>health services accessibility</kwd><kwd>questionnaires</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Health care in Australia is recognized as both comprehensive and highly complex, making it challenging for many individuals to navigate [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Although the majority of Australians&#x2014;around 86%&#x2014;report navigating the system with relative ease, 14% find it difficult [<xref ref-type="bibr" rid="ref1">1</xref>]. The challenge is particularly pronounced for health services in regional, rural, and remote contexts, where lack of awareness and the complexity of accessing care are cited as major barriers [<xref ref-type="bibr" rid="ref3">3</xref>]. Moreover, rural Australians, who comprise roughly 28% of the population, experience higher rates of illness and lower per capita funding compared to their urban counterparts [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. These disparities underscore the importance of effective tools and strategies that can simplify health care navigation, with implications for rural and regional populations.</p><p>To ensure that studies aimed at developing effective tools and strategies for simplifying health care navigation truly address the needs of communities, it is essential to involve health consumers directly in setting research priorities [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. The research priority for this study was informed via a consumer engagement session held in Dalby, Queensland, a rural town approximately 200 kilometers west of Brisbane, on February 14, 2024. The roundtable brought together community members, regional health service providers, and researchers to discuss health care challenges specific to rural and remote populations. Participants highlighted significant barriers to accessing health care, particularly the difficulty of understanding what services are available and determining if they are eligible for them. Many described feeling uncertain about where to seek help, encountering confusing bureaucratic processes, and struggling to navigate both in-person and online health resources. Guided by this input, the present study focuses on health navigation challenges involving rural and remote health services in Queensland, Australia.</p><p>In recent decades, the internet has emerged as a primary source of health information for Australians of all ages, with a search engine being the most common starting point [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref11">11</xref>]. However, individuals searching for health information online often do not engage in thorough source comparison, instead extracting information from the search results page or a single website rather than critically assessing multiple authoritative sources [<xref ref-type="bibr" rid="ref12">12</xref>]. This can lead to suboptimal decisions and increased barriers to timely, appropriate care, particularly in complex health care systems like that of Australia.</p><p>One technological innovation that may help address these challenges is the deployment of generative artificial intelligence (AI) in health information searches. Research indicates that users in Australia are already testing the waters with AI-based tools; for instance, nearly one in 10 Australians reported using ChatGPT to ask health questions in the first half of 2024 [<xref ref-type="bibr" rid="ref13">13</xref>]. Large language model (LLM)&#x2013;based systems can simplify health information so that it is more readable [<xref ref-type="bibr" rid="ref14">14</xref>], potentially improving health information seeking. Emerging research suggests that clinicians generally prefer LLM-generated responses to common patient health queries, rating them as more accurate and comprehensive than the results provided by traditional search engines [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref19">19</xref>]. In addition, LLMs can be interfaced with search engines to create generative AI search tools (also known as conversational search), which promise to streamline information retrieval by automating the process of selecting search terms, filtering content, and providing a readable summary of information from multiple sources [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. However, there are significant concerns about the reliability of LLMs. For instance, LLMs often &#x201C;hallucinate,&#x201D; generating information that appears plausible but is inaccurate [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. In a health care context, where evidence-based information is critical, the consequences of these inaccuracies can be serious. Although early adopters may find these tools appealing, it is not clear whether generative AI search tools are as accurate as conventional search engines in helping Australians navigate the health care system.</p><p>The primary aim of the present study is to compare the accuracy of Australian users&#x2019; answers to health care navigation scenarios when using a generative AI search (Microsoft Copilot) versus using a conventional search engine (Google Web Search). The secondary aim is to compare the user experience of these 2 tools, in terms of factors such as perceived value, concerns about privacy, perceived effort to use, and willingness to adopt.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Participant Recruitment</title><p>We recruited participants using the web-based platform Prolific, who were then linked to Qualtrics to complete the survey. All Prolific users are required to be over 18 years of age. We only recruited those located in Australia and whose profile indicated they could speak English. To ensure all participants would have similar familiarity with state-based health policies and services, we screened out participants who resided in Australian states other than Queensland or had past or present employment in a health profession.</p><p>Participants were recruited between November 3, 2024, and January 8, 2025.</p></sec><sec id="s2-2"><title>Baseline Measures</title><p>Initial demographics were also collected, including age, gender (man, woman, nonbinary, or prefer not to disclose), and language proficiency. Additional data about participants&#x2019; eHealth literacy and computer skills and knowledge were collected using the eHealth Literacy Scale (eHEALS) [<xref ref-type="bibr" rid="ref24">24</xref>]. The eHEALS includes 8 questions rated on a 5-point Likert scale, ranging from 1 (strongly disagree) to 5 (strongly agree), with a total possible score between 8 and 40.</p><p>In addition, a 4-item computer skills and knowledge questionnaire was used for the purpose of assessing use of computers, knowledge, skills with programs, and skills with computer applications [<xref ref-type="bibr" rid="ref25">25</xref>]. Each item was rated on a 9-point rating scale, ranging from 1 (very low skill or knowledge) to 9 (very high skill or knowledge).</p></sec><sec id="s2-3"><title>Procedure</title><p>Participants were randomized to use either the generative AI search tool or Google Web Search. They were given simple instructions on how to access the site and interact with it; however, no guidance was provided on how best to formulate a prompt or search strategy.</p><p>Participants were tasked to complete 5 task-based scenarios, each presenting a brief vignette of a person, their location in rural or regional Queensland, their health condition, and the health service or health payment, subsidy, or scheme about which they were seeking information. Each scenario had 1 or 2 tasks respondents were asked to complete using their assigned tool (<xref ref-type="table" rid="table1">Table 1</xref>), for a total of 8 tasks. An example of a scenario (Scenario 3) and the 2 associated tasks is provided in <xref ref-type="other" rid="box1">Textbox 1</xref> (all scenarios and tasks available in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Although they were instructed to spend no more than five minutes per scenario, no time limits were enforced. Seven participants who did not provide a response to any of the 5 scenarios were removed from the sample.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Task-based scenarios with corresponding outcome (and associated scoring criteria).</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Scenario</td><td align="left" valign="bottom">Task</td><td align="left" valign="bottom">Outcome</td><td align="left" valign="bottom">Scoring</td></tr></thead><tbody><tr><td align="char" char="." valign="top">1</td><td align="left" valign="top">Locating telehealth physiotherapy clinics</td><td align="left" valign="top">Number of correctly identified services</td><td align="char" char="." valign="top">0&#x2010;2</td></tr><tr><td align="char" char="." valign="top">2</td><td align="left" valign="top">Locating nearby aged care providers</td><td align="left" valign="top">Number of correctly identified providers</td><td align="char" char="." valign="top">0&#x2010;2</td></tr><tr><td align="char" char="." valign="top">2</td><td align="left" valign="top">Naming aged care application service</td><td align="left" valign="top">Whether a service named is correct</td><td align="left" valign="top">Incorrect, correct</td></tr><tr><td align="char" char="." valign="top">3</td><td align="left" valign="top">Listing mobility allowance eligibility criteria</td><td align="left" valign="top">Number of correctly identified criteria</td><td align="char" char="." valign="top">0&#x2010;6</td></tr><tr><td align="char" char="." valign="top">3</td><td align="left" valign="top">Providing mobility allowance helpline number</td><td align="left" valign="top">Whether a phone number provided is correct</td><td align="left" valign="top">Incorrect, correct</td></tr><tr><td align="char" char="." valign="top">4</td><td align="left" valign="top">Locating nearby mental health center</td><td align="left" valign="top">Correctness of identified facility(name AND address)</td><td align="left" valign="top">Incorrect, partially correct, correct</td></tr><tr><td align="char" char="." valign="top">5</td><td align="left" valign="top">Assessing patient travel subsidy eligibility</td><td align="left" valign="top">Correctness of subsidy identified</td><td align="left" valign="top">Incorrect, partially correct, correct</td></tr><tr><td align="char" char="." valign="top">5</td><td align="left" valign="top">Providing travel subsidy agency contact details</td><td align="left" valign="top">Whether any contact details provided are correct</td><td align="left" valign="top">Incorrect, correct</td></tr></tbody></table></table-wrap><boxed-text id="box1"><title> An example of a scenario (Scenario 3) provided to participants, with the 2 tasks associated with this scenario.</title><p><bold>Scenario 3</bold></p><p>Hamid sustained a spinal injury in a motor vehicle collision 9 months ago. He now uses a wheelchair permanently for mobility. He recently moved to live with family in Charters Towers and is looking for employment opportunities. However, he finds it difficult to pay for the cost of travel to look for work as he cannot drive and there is no public transport in the area. He has a membership for the Queensland Government's Taxi Subsidy Scheme (TSS), but this only pays half of his taxi fares. He recently learned of a funding program called Mobility Allowance.</p><p><italic>Task 1:</italic></p><p>What information about Hamid would you need to know to determine if he is eligible for this program?</p><p><italic>Task 2:</italic></p><p>Hamid wants to talk to somebody over the telephone about getting mobility allowance, but struggles with speaking English. What phone number(s) should he call?</p></boxed-text></sec><sec id="s2-4"><title>Model Selection</title><p>To determine which generative AI search tool would be more appropriate for the current study, the accuracy of various free-access and subscription-only generative AI search tools was assessed. In September 2024, using the same prompts as detailed above, each answer engine was prompted 3 times. Each trial was conducted in an independent session (ie, a new chat was started each time), with any &#x2019;memory&#x2019; features across chats disabled where applicable. The responses were scored by one author (JS), using the same scoring metric as the primary outcome measure, and scores were discussed with the research team. An equal-weighted total average score for each model was calculated by assigning numeric scores to all binary or ordinal scores (eg, Incorrect=0, Partially correct=0.5, Correct=1), weighting each of the 5 scenarios equally. By this metric, all subscription-only generative AI search tools available at the time were substantially more accurate than the free-access versions available at the time (<xref ref-type="fig" rid="figure1">Figure 1</xref>). However, it was not feasible to supply all survey participants with access to a paid subscription. Of the free-access tools available at the time, Microsoft Copilot (formally Bing Chat) was chosen due to not requiring user accounts to be created for access, which streamlined the survey process and decreased the burden on participants. For additional context, newer free models released after the study began were tested in February 2025 and are also shown in <xref ref-type="fig" rid="figure1">Figure 1</xref>.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Chart of mean accuracy of the candidate free-access and subscription-only generative AI search tools across 3 trials. Results for each trial indicated by empty circles.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e76203_fig01.png"/></fig></sec><sec id="s2-5"><title>Outcome Measures</title><sec id="s2-5-1"><title>Primary Outcome Measure</title><p>The primary outcome variable was the accuracy of the response, based on whether correct information was retrieved. One author (JS) developed a scheme to score each response, which was reviewed and refined based on feedback from a second author (MHR). Two authors (JS, MHR), both registered physiotherapists in Queensland, scored each response based on these criteria and resolved any disagreements through discussion. Scoring was blinded to the search tool used to generate the answers. Points were awarded based on the presence of correct information (eg, services or payments that would address the client&#x2019;s needs, correctly identified eligibility criteria, or correct contact details). Scores were not deducted for the presence of incorrect information in the responses, nor was the score deducted for providing more information than asked (eg, if asked for 2 potential services and giving 3).</p></sec><sec id="s2-5-2"><title>Secondary Outcome Measure</title><p>Responses to a Technology Rating Questionnaire (TRQ) [<xref ref-type="bibr" rid="ref25">25</xref>] were collected as secondary outcomes. Participants were asked to respond to 7 items on a 1-9 numerical rating scale (anchored by 1 as none or not at all and 9 as a lot or extremely) about the technology, including their:</p><list list-type="bullet"><list-item><p>Willingness to adopt</p></list-item><list-item><p>Confidence in learning</p></list-item><list-item><p>Help needed to learn</p></list-item><list-item><p>Perceived effort to learn</p></list-item><list-item><p>Perceived value</p></list-item><list-item><p>Privacy concerns</p></list-item><list-item><p>Quality of life impact</p></list-item></list></sec></sec><sec id="s2-6"><title>Data Analysis</title><p>Data were analyzed using R software (version 4.4.1; R Core Team). To compare the 2 groups (Microsoft Copilot and Google Web Search) for the primary outcome of how well responses matched the pre-set criteria, appropriate statistical methods were selected for each question. For questions assessed on a continuous or interval scale (eg, number of correct answers), the mean difference between scores was presented alongside boot-strapped 95% CIs generated with the &#x201C;boot&#x201D; package, using the bias-corrected and accelerated (BCa) method with 5000 iterations. For questions with binary answers (ie, marked as either correct or incorrect), the difference in proportions of correct responses between groups was presented alongside the 95% CIs calculated with the &#x201C;DescTools&#x201D; package using the Agresti-Coull method. For questions with ordinal answers (eg, marked as Incorrect, Partially Correct, or Correct), ordinal regression was performed using cumulative link models with the &#x201C;ordinal&#x201D; package, with a logit link function. The proportional odds assumption was verified using Brant tests with the package &#x201C;brant.&#x201D; The secondary outcome on the technology rating questionnaire was analyzed using the same bootstrapping method as the interval scale data for the primary outcome.</p></sec><sec id="s2-7"><title>Ethical Considerations</title><p>This study was approved by The University of Queensland Faculty of Health and Behavioural Sciences Low and Negligible Risk Panel (2024/HE001343). Participants provided electronic informed consent after reviewing a participant information sheet. Recruitment occurred via a research panel (Prolific), and&#x2014;given the anonymous design&#x2014;data provided before withdrawal could not be removed. Participants were compensated for their time through Prolific, receiving an average of &#x00A3;5.50 (approximately US$6.90). We maintained participants&#x2019; privacy and confidentiality by collecting only nonidentifiable data, which were stored on The University of Queensland Research Data Management System.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Sample Characteristics</title><p>After receiving 672 submissions from Prolific, the final sample consisted of 97 participants. Most participants were from metropolitan areas (72/97, 74%). Mean age was 36 (SD 14) years with an age range from 18 to 83 years. Gender distribution included 49% (48/97) identifying as men, 42% (41/97) as women, and 3% (3/97) as nonbinary. Only 6% (6/97) of the sample reported being of Aboriginal or Torres Strait origin. See <xref ref-type="table" rid="table2">Table 2</xref> for full details.</p><p>The mean self-reported eHEALS of participants (mean sum score) was 27 (SD 4; min 12, max 35) out of 40, indicating most participants were confident using the internet for health information. The average rating for computer and internet skills and knowledge was around 7.5-8 (of a range of 1-9), indicating participants were very confident with the use of computers and the internet.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Participant sociodemographic and health characteristics, computer and Internet skills, and eHealth literacy scores.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Characteristic</td><td align="left" valign="bottom">Google Web Search (n=50)</td><td align="left" valign="bottom">Microsoft Copilot (n=47)</td><td align="left" valign="bottom">Overall (n=97)</td></tr></thead><tbody><tr><td align="left" valign="top">What is your age (in years)? mean (SD)</td><td align="left" valign="top">37 (14)</td><td align="left" valign="top">35 (15)</td><td align="left" valign="top">36 (14)</td></tr><tr><td align="left" valign="top" colspan="4">With which gender do you most identify? n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Man</td><td align="left" valign="top">29 (58)</td><td align="left" valign="top">19 (40)</td><td align="left" valign="top">48 (49)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Woman</td><td align="left" valign="top">18 (36)</td><td align="left" valign="top">23 (49)</td><td align="left" valign="top">41 (42)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Nonbinary</td><td align="left" valign="top">1 (2)</td><td align="left" valign="top">2 (4)</td><td align="left" valign="top">3 (3)</td></tr><tr><td align="left" valign="top" colspan="4">Are you of Aboriginal or Torres Strait origin? n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>No</td><td align="left" valign="top">45 (90)</td><td align="left" valign="top">41 (87)</td><td align="left" valign="top">86 (89)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Yes, Aboriginal</td><td align="left" valign="top">3 (6)</td><td align="left" valign="top">3 (6)</td><td align="left" valign="top">6 (6)</td></tr><tr><td align="left" valign="top" colspan="4">What is your highest level of completed education? n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>High school (secondary school)</td><td align="left" valign="top">5 (10)</td><td align="left" valign="top">8 (17)</td><td align="left" valign="top">13 (13)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Certificate I-IV (including trade certificate)</td><td align="left" valign="top">6 (12)</td><td align="left" valign="top">6 (13)</td><td align="left" valign="top">12 (12)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Diploma (or associate degree)</td><td align="left" valign="top">4 (8)</td><td align="left" valign="top">2 (4)</td><td align="left" valign="top">6 (6)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Bachelor degree (including Bachelor Honours degrees and graduate diploma or certificate)</td><td align="left" valign="top">21 (42)</td><td align="left" valign="top">21 (45)</td><td align="left" valign="top">42 (43)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Masters degree (coursework or research)</td><td align="left" valign="top">10 (20)</td><td align="left" valign="top">3 (6)</td><td align="left" valign="top">13 (13)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Doctoral degree (eg, PhD)</td><td align="left" valign="top">1 (2)</td><td align="left" valign="top">4 (9)</td><td align="left" valign="top">5 (5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Prefer not to say</td><td align="left" valign="top">1 (2)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">1 (1)</td></tr><tr><td align="left" valign="top" colspan="4">What is your current employment status? n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Employed (full time)</td><td align="left" valign="top">22 (44)</td><td align="left" valign="top">21 (45)</td><td align="left" valign="top">43 (44)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Employed part time (or casual employment)</td><td align="left" valign="top">12 (24)</td><td align="left" valign="top">12 (26)</td><td align="left" valign="top">24 (25)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Student and employed</td><td align="left" valign="top">1 (2)</td><td align="left" valign="top">4 (9)</td><td align="left" valign="top">5 (5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Student, not employed</td><td align="left" valign="top">2 (4)</td><td align="left" valign="top">3 (7)</td><td align="left" valign="top">5 (5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Retired</td><td align="left" valign="top">3 (6)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">3 (3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Unemployed, looking for work</td><td align="left" valign="top">5 (10)</td><td align="left" valign="top">3 (6)</td><td align="left" valign="top">8 (8)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Person with disability and unable to work</td><td align="left" valign="top">2 (4)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">2 (2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Student, with disability and unable to work</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">1 (2)</td><td align="left" valign="top">1 (1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Self-employed</td><td align="left" valign="top">1 (2)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">1 (1)</td></tr><tr><td align="left" valign="top" colspan="4">Geographic remoteness (Modified Monash Model [MMM] 2019), n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>MMM 1: Metropolitan areas in major cities</td><td align="left" valign="top">39 (78)</td><td align="left" valign="top">33 (70)</td><td align="left" valign="top">72 (74)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>MMM 2: Regional centers (populations &#x003E;50,000)</td><td align="left" valign="top">2 (4)</td><td align="left" valign="top">6 (13)</td><td align="left" valign="top">8 (8)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>MMM 3: large rural towns (15,000&#x2013;50,000)</td><td align="left" valign="top">1 (2)</td><td align="left" valign="top">1 (2)</td><td align="left" valign="top">2 (2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>MMM 4: medium rural towns (5000&#x2013;15,000)</td><td align="left" valign="top">1 (2)</td><td align="left" valign="top">1 (2)</td><td align="left" valign="top">2 (2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>MMM 5: small rural towns (&#x003C;5000)</td><td align="left" valign="top">4 (8)</td><td align="left" valign="top">2 (4)</td><td align="left" valign="top">6 (6)</td></tr><tr><td align="left" valign="top" colspan="4">Do you have a chronic health condition? n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>No</td><td align="left" valign="top">32 (64)</td><td align="left" valign="top">31 (66)</td><td align="left" valign="top">63 (65)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Prefer not to say</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">2 (4)</td><td align="left" valign="top">2 (2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Yes, 1 chronic health condition</td><td align="left" valign="top">12 (24)</td><td align="left" valign="top">7 (15)</td><td align="left" valign="top">19 (20)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Yes, several chronic health conditions</td><td align="left" valign="top">4 (8)</td><td align="left" valign="top">3 (6)</td><td align="left" valign="top">7 (7)</td></tr><tr><td align="left" valign="top" colspan="4">Do you use a language other than English at home? n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>No, English only</td><td align="left" valign="top">45 (90)</td><td align="left" valign="top">36 (89)</td><td align="left" valign="top">81 (92)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Yes, use another language at home</td><td align="left" valign="top">3 (6)</td><td align="left" valign="top">8 (17)</td><td align="left" valign="top">11 (11)</td></tr><tr><td align="left" valign="top" colspan="4">How well do you speak English? n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Very well</td><td align="left" valign="top">47 (94)</td><td align="left" valign="top">42 (89)</td><td align="left" valign="top">89 (92)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Well</td><td align="left" valign="top">1 (2)</td><td align="left" valign="top">2 (4)</td><td align="left" valign="top">3 (3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Not very well</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Not at all</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td></tr><tr><td align="left" valign="top">eHealth Literacy Score (eHEALS), mean (SD)</td><td align="left" valign="top">27 (4)</td><td align="left" valign="top">27 (4)</td><td align="left" valign="top">27 (4)</td></tr><tr><td align="left" valign="top" colspan="4">Computer and Internet skills and knowledge (1 to 9 scale), mean (SD)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Basic computer skill</td><td align="left" valign="top">8.1 (1.1)</td><td align="left" valign="top">8.3 (1.0)</td><td align="left" valign="top">8.2 (1.0)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Internet and email skill or knowledge</td><td align="left" valign="top">8.0 (1.0)</td><td align="left" valign="top">8.1 (1.0)</td><td align="left" valign="top">8.1 (1.0)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Computer programs knowledge</td><td align="left" valign="top">7.6 (1.3)</td><td align="left" valign="top">7.6 (1.3)</td><td align="left" valign="top">7.6 (1.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Computer applications knowledge</td><td align="left" valign="top">7.7 (1.3)</td><td align="left" valign="top">7.8 (1.1)</td><td align="left" valign="top">7.8 (1.2)</td></tr></tbody></table></table-wrap></sec><sec id="s3-2"><title>Primary Outcome</title><p>Participants in the present study using Microsoft Copilot achieved an overall accuracy averaging 68.2% (equally weighted across all tasks), while those using Google Web Search averaged 65.9%. Median length of participants&#x2019; written responses across all tasks in the Microsoft Copilot was 158 characters (IQR 37-373; Min=7, Max=1558), whereas in the Google Web Search group median response length was 65 characters (IQR 16-104; Min=3, Max=1492), a median difference of 93 characters (95% CI 53-138).</p><p>In the Scenario 1 task (locating telehealth physiotherapy clinics), respondents randomly assigned to Microsoft Copilot correctly located an average of 1.47 telehealth services, just 0.07 (95% CI &#x2212;0.22 to 0.34) more than the 1.40 services found by those assigned Google Web Search (<xref ref-type="fig" rid="figure2">Figure 2</xref>).</p><p>In the first Scenario 2 task (locating nearby aged care providers), respondents assigned to Microsoft Copilot identified an average of 1.20 services, only 0.08 (95% CI &#x2212;0.27 to 0.42) services more than the 1.12 services identified by those assigned to Google Web Search. In the second Scenario 2 task (naming aged care application service), 84% (38/45) of respondents in the Microsoft Copilot group identified the correct government service compared with 58% (29/50) who did so in the Google Web Search group. This indicates Microsoft Copilot increased the proportion of correct responses for this question by 0.26 (95% CI 0.08-0.42) over Google Web Search (<xref ref-type="fig" rid="figure2">Figure 2</xref>).</p><p>In the first Scenario 3 task (listing mobility allowance eligibility criteria), Microsoft Copilot respondents correctly identified an average of 4.67 eligibility criteria compared to 3.43 criteria in the Google Web Search group (<xref ref-type="fig" rid="figure2">Figure 2</xref>). This represents a clear advantage in favor of Microsoft Copilot, with a mean difference of 1.24 (95%CI 0.49-1.91). In the second Scenario 3 task (providing mobility allowance helpline number), just 28 of the 44 Microsoft Copilot group listed a correct phone number for the translation service, which was a lower proportion than the 38 of 48 respondents assigned Google Web Search (<xref ref-type="fig" rid="figure2">Figure 2</xref>). This is a mean difference in proportions of &#x2212;0.16 (95% CI &#x2212;0.33 to 0.03).</p><p>For the Scenario 4 task (locating a nearby mental health center), 53% (24/45) of respondents in the Microsoft Copilot group were completely correct and 3 of the 45 were partially correct. In contrast, 69% (34/49 respondents) in the Google Web Search group were completely correct and 3 of 49 were partially correct (<xref ref-type="fig" rid="figure2">Figure 2</xref>). Estimated odds ratio from the ordinal logistic regression analysis of 0.5 (95% CI 0.2-1.1) suggests that assignment to Microsoft Copilot may substantially reduce the odds of achieving a higher score relative to Google Web Search. However, the wide confidence interval indicates imprecision, with possible effects ranging from a large reduction through no difference to even a modest increase.</p><p>In the first Scenario 5 task (assessing patient travel subsidy eligibility), 86% (38/44 respondents) in the Microsoft Copilot group answered completely correctly, with an additional 3 (of the 44) providing partially correct answers (<xref ref-type="fig" rid="figure2">Figure 2</xref>). For the same question, 80% (39/49 respondents) in the Google Web Search group were completely correct, and 5 (of the 49) offered partially correct responses. Based on ordinal logistic regression, participants assigned to Microsoft Copilot had approximately 1.6 times the odds of achieving a more accurate answer compared to those in the Google Web Search group (odds ratio [OR] 1.6, 95% CI 0.5-5.2). However, the wide confidence interval ranges from a reduction in odds to a substantial increase. In the second Scenario 5 task (providing travel subsidy agency contact details), just 40% (18/45) of respondents in the Microsoft Copilot group listed correct contact details for the government agency, whereas exactly half of those in the Google Web Search group (24 of 48) did so (<xref ref-type="fig" rid="figure2">Figure 2</xref>). This is a difference in proportions of &#x2212;0.1 (95% CI &#x2212;0.29 to 0.10).</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Accuracy of responses to healthcare navigation tasks using Microsoft Copilot versus Google Web Search. (A) Bar charts indicating response accuracy ratings across eight navigation tasks. (<bold>B</bold>) Between-group comparisons, with associated 95% CI.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e76203_fig02.png"/></fig></sec><sec id="s3-3"><title>Secondary Outcome (Technology Rating Questionnaire)</title><p>On the 0&#x2010;10 TRQ subquestions (<xref ref-type="fig" rid="figure3">Figure 3</xref>), participants on average rated Microsoft Copilot as 0.87 points lower for willingness to adopt (95% CI &#x2212;1.65 to &#x2212;0.28) and 1.05 points lower for impact on quality of life (95% CI &#x2212;1.69,&#x2212;0.42). These 95% CIs are compatible with meaningful differences favoring Google Web Search, though the precise effect sizes remain uncertain.</p><p>For perceived value (mean difference &#x2212;0.05, 95% CI &#x2212;0.57 to 0.49), help needed (mean difference 0.22, 95% CI &#x2212;0.89 to 1.33), and confidence (mean difference &#x2212;0.02, 95% CI &#x2212;0.43 to 0.32), the confidence intervals are all compatible with small effects in either direction or essentially no difference between the 2 tools.</p><p>Privacy concerns were 0.69 points higher for Microsoft Copilot (95% CI &#x2212;0.32 to 1.67), though the confidence interval includes the possibility of no difference or even slightly greater concerns about Google Web Search.</p><p>Finally, participants reported that Google Web Search required 1.01 points less effort to learn than Microsoft Copilot (95% CI &#x2212;1.69 to &#x2212;0.42), consistent with Google Web Search being perceived as somewhat easier to learn than Microsoft Copilot.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Combined bar chart and bubble chart showing mean score (bars) and counts (bubble area) for score ratings on the technology rating questionnaire for each assigned tool.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e76203_fig03.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Summary of Findings</title><p>The present study compares the occurrence of correct information in responses to health navigation queries from users assigned to use either a generative AI search tool (powered by an LLM augmented with the retrieval of live web data) or a traditional search engine. Our primary results found those assigned to Microsoft Copilot outperformed the Google Web Search group for 2 tasks: naming the service to apply for an aged care assessment and assessing mobility allowance eligibility. There was no clear evidence of differences between the tools for the remaining 6 tasks. This suggests that while this generative AI search tool might be superior to a search engine in some specific instances, its overall advantage in terms of accuracy in health navigation tasks is not guaranteed.</p><p>We acknowledge that our selection of Microsoft Copilot as the generative AI search tool may limit generalizability to other current and future generative AI search tools. Microsoft Copilot was selected because it offered free access without the need for participants to create an account. However, the field of AI is evolving rapidly, with many newer and more advanced search-enabled models being released by OpenAI, DeepSeek, Anthropic, and others. Indeed, our initial testing of various models (<xref ref-type="fig" rid="figure1">Figure 1</xref>) indicates that many other generative AI search tools were substantially more accurate than Microsoft Copilot on the health navigation tasks used in the present study. Therefore, if participants had been assigned a more advanced generative AI search tool, it is possible they would have achieved higher accuracy compared to Google Web Search.</p><p>The growing integration of generative AI into conventional web search has blurred the lines between traditional search engines and AI-based search tools, potentially influencing the outcomes of the present study. In October 2024, just one month before recruitment commenced for the present study, Google rolled out AI Overviews across Australia, a generative AI feature that provides users with concise, AI-generated summaries within search results [<xref ref-type="bibr" rid="ref26">26</xref>]. However, these summaries would (at the time) only appear when users were logged in using their Google account and participants assigned to Google Web Search were instructed to open a private (or incognito) window to ensure they were not logged in. Despite this, participants who did not follow these instructions could potentially have encountered these AI-generated summaries. Participants were responding to a web-based survey, and therefore, it was not possible to track or record their exact search queries or which websites were visited. As generative AI becomes more embedded in everyday search experiences, traditional web searches could potentially become less common. This may make studying search engines in isolation less practically relevant and increase the need for ongoing research into the accuracy and reliability of generative AI search.</p><p>It is noteworthy that the Microsoft Copilot group achieved an overall accuracy of 68.2%, compared with 58.2% observed during preliminary testing conducted 2 months earlier, where scenarios were simply copy-pasted into Copilot 3 times. The higher accuracy observed might suggest that participants visited source websites to verify the information in the AI-generated summaries. It could also result from participants using more effective prompting strategies (than using the question verbatim), asking follow-up questions, or retrying prompts when initial responses were clearly incorrect. However, this is an incidental observation rather than a study objective. We cannot exclude sampling bias, given our preliminary estimate was based on just 3 attempts. In addition, improvements in Copilot&#x2019;s underlying AI model or updates to the accuracy of content available on the web between preliminary testing and study commencement could also have contributed to this difference.</p><p>Participants in our study skewed toward higher formal education and were generally quite confident in their ability to find health information on the internet. This may limit generalizability, as individuals with lower educational levels have lower self-rated ability to evaluate online health information and report lower overall trust in such information, compared to their more educated counterparts [<xref ref-type="bibr" rid="ref27">27</xref>]. Furthermore, some evidence suggests that generative AI tools tend to offer greater benefits for nonexperts [<xref ref-type="bibr" rid="ref28">28</xref>], though other evidence suggests no advantage in favor of ChatGPT (without web search capability) among lower-education users with lower proficiency using Google Web Search [<xref ref-type="bibr" rid="ref29">29</xref>]. It is possible that most participants in our study were able to effectively complete the task using either tool, potentially decreasing any between-tool differences. We did not design or power the study to test differences by education or eHealth literacy, so any differential effects remain a question for future work.</p><p>Health care navigation is a multifaceted process, depending not only on access to accurate information, but also consideration of the affordability of the health service, flexibility of service options, inclusivity of the clinical environment, and alignment with consumers&#x2019; personal beliefs and knowledge [<xref ref-type="bibr" rid="ref30">30</xref>]. Given this complexity, the scenario-based tasks used in this study likely do not fully capture the nuances of real-world health care navigation. Participants were presented with contrived scenarios in which they had no personal or emotional investment, only a single attempt to find the correct answer within a brief 5-minute window, with no opportunity to reassess or refine their responses, and scored on an artificial scoring metric. The scenarios were also deliberately designed to be challenging yet solvable, specifically to enable a meaningful comparison between the 2 tools, rather than to represent the most common real-world health navigation scenarios. Therefore, performance on these scenarios cannot be used to draw definitive conclusions about real-world health navigation ability. Future research exploring generative AI in authentic health care contexts should better account for these nuances and thus more accurately represent how individuals engage in health decision-making.</p><p>Secondary outcome results indicate that the willingness to adopt Google Web Search was higher than Microsoft Copilot. This contrasts with a previous study, which found that despite greater trust in an LLM (OpenAI&#x2019;s ChatGPT) for health information, participants rated intention to use the LLM similarly to Google Web Search [<xref ref-type="bibr" rid="ref31">31</xref>]. This difference may be due to familiarity with the specific LLM, as ChatGPT is more widely used than Copilot for health-related queries [<xref ref-type="bibr" rid="ref32">32</xref>], potentially leading to greater willingness to use. More broadly, users&#x2019; trust in health information and chatbots is shaped by factors such as usability, perceived risk, and credibility [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>], all of which could differ between ChatGPT and Copilot. Furthermore, the difference in tasks between seeking health information and navigating health care services might influence users&#x2019; trust and adoption decisions, especially if there is a perception that LLM-based systems can struggle with more context-dependent or location-specific queries.</p><p>The present study found no clear evidence of a difference in perceived effort to use or help required between the generative AI search and conventional search, in concordance with existing research comparing ease of use of LLMs and search engines for health queries [<xref ref-type="bibr" rid="ref32">32</xref>]. Contrasting our findings, a recent study [<xref ref-type="bibr" rid="ref35">35</xref>] reported lower cognitive load among students researching socio-scientific issues about sunscreen with ChatGPT compared to Google. Participants may have greater concerns about the reliability and accuracy of AI-generated health information than other types of information, potentially limiting the advantages of AI-driven tools in a health context. In addition, it has been reported that users tend to prefer traditional search engines for fact-based information retrieval, turning to LLMs primarily when they need more personalized or lay-language explanations [<xref ref-type="bibr" rid="ref36">36</xref>]. Because the health navigation tasks in this study were relatively fact-based, the strengths of a generative AI search tool may not have been prominent.</p></sec><sec id="s4-2"><title>Strengths and Limitations</title><p>A key strength of this study is that it closely observed how real users interact with 2 distinct search tools, rather than merely comparing the static accuracy of the output of each tool. By allowing participants to formulate queries and responses freely, a more realistic picture of AI-assisted health care navigation was obtained. In addition, focusing on generative AI search&#x2014;rather than an LLM without the capability to augment responses with information from a web search&#x2014;better reflects the tools now emerging in consumer products. Our relatively large sample size of approximately 100 participants also provides a solid basis for analyzing differences between the 2 groups.</p><p>Despite these strengths, an important methodological limitation lies in our scoring approach, which looked only at the presence of correct information in responses and did not account for the presence of incorrect information. Not penalizing incorrect information means our scores may overestimate the accuracy of responses, since in real-world health navigation contexts, misinformation can delay care or impose additional burden on users. Although when compared to other health contexts (such as clinical advice), such risks may be somewhat less impactful for health navigation, as users may receive assistance when contacting incorrect government services, these risks may still be important for urgent health navigation tasks. Consequently, longer responses with multiple possible answers may have received inflated accuracy scores due to an increased chance of including correct items. This may have favored the Microsoft Copilot group, perhaps because they could easily copy the generated AI response, responded with much longer answers to the scenarios than the Google Web Search group. Similarly, our scoring metric may not have been able to capture the practical value of receiving partially helpful information, such as being directed to call a general help line (marked as incorrect in the scoring metric) that could have referred callers to the more appropriate service (which the metric marked as correct). We therefore highlight that our metric should be interpreted as measuring whether users can obtain correct information with these search tools, rather than whether they do so without adding incorrect information.</p><p>In addition to the scoring issues, several other methodological limitations stem from participant characteristics and study context. Participants in our sample were predominantly living in metropolitan areas and may have had limited direct experience with rural health care services, which may have affected how they interpreted the navigation tasks of the present study. Furthermore, we did not explore the extent to which participants were familiar with their assigned tool, and familiarity (or lack thereof) could have affected both the accuracy of the responses and how participants rated the technology. Overall, these methodological constraints mean that our findings should be interpreted cautiously, especially when extending them beyond the specific population, tasks, or tools studied here.</p><p>Future research should explore these findings further by conducting a follow-up study focused exclusively on rural participants to determine whether the observed patterns persist in different geographic contexts. In addition, future studies should prioritize methods that can more precisely capture user behavior, such as incorporating screen-tracking software or controlled environments, to yield more definitive insights. Another critical avenue for investigation is how users engage with lengthy LLM-generated responses, particularly how they prioritize or dismiss certain portions when seeking health-related information. Understanding this selection process could inform strategies to improve AI-generated content for critical decision-making. Finally, longitudinal research could help clarify whether repeated exposure to AI-generated content enhances users&#x2019; ability to critically assess multiple answers or, conversely, reinforces reliance on AI as an authoritative source.</p></sec><sec id="s4-3"><title>Conclusions</title><p>Although Microsoft Copilot demonstrated improved accuracy over Google Web Search on 2 of the tested scenarios, for all others, the accuracy estimates were compatible with no difference between the 2 tools. Participants also reported lower willingness to adopt Copilot, as well as higher concerns around privacy and ease of learning. The results of this study underscore how generative AI tools still face hurdles in accuracy and user acceptance. Future work should investigate whether newer and more capable generative AI tools can consistently outperform conventional search engines across a broader range of real-world health navigation tasks.</p></sec></sec></body><back><notes><sec><title>Data Availability</title><p>The data sets generated during and analyzed in this study are not publicly available due to lack of ethical clearance to disclose data to third parties.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: JS (lead), TR (equal)</p><p>Data curation: JS</p><p>Project administration: JS</p><p>Investigation: JS (lead), MHR (supporting)</p><p>Methodology: JS (lead), TR (supporting), MHR (supporting)</p><p>Formal analysis: JS (lead), MHR (supporting)</p><p>Software: JS</p><p>Resources: TR</p><p>Supervision: TR</p><p>Validation: MHR</p><p>Visualization: JS</p><p>Writing &#x2013; original draft: JS (lead), TR (supporting), MHR (supporting)</p><p>Writing &#x2013; review &#x0026; editing: JS (lead), TR (supporting), MHR (supporting)</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">eHEALS</term><def><p>eHealth Literacy Scale</p></def></def-item><def-item><term id="abb3">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb4">TRQ</term><def><p>Technology Rating Questionnaire</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="report"><article-title>National health survey: health literacy</article-title><year>2018</year><access-date>2025-09-25</access-date><publisher-name>Australian Bureau of Statistics</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.abs.gov.au/statistics/health/health-conditions-and-risks/national-health-survey-health-literacy/2018">https://www.abs.gov.au/statistics/health/health-conditions-and-risks/national-health-survey-health-literacy/2018</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="report"><person-group person-group-type="author"><name name-style="western"><surname>Calder</surname><given-names>R</given-names> </name><name name-style="western"><surname>Dunkin</surname><given-names>R</given-names> </name><name name-style="western"><surname>Rochford</surname><given-names>C</given-names> </name><name name-style="western"><surname>Nichols</surname><given-names>T</given-names> </name></person-group><article-title>Australian health services: too complex to navigate. a review of the national reviews of australia&#x2019;s health service arrangements</article-title><year>2019</year><publisher-name>Australian Health Policy Collaboration</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.vu.edu.au/sites/default/files/australian-health-services-too-complex-to-navigate.pdf">https://www.vu.edu.au/sites/default/files/australian-health-services-too-complex-to-navigate.pdf</ext-link></comment></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kavanagh</surname><given-names>BE</given-names> </name><name name-style="western"><surname>Corney</surname><given-names>KB</given-names> </name><name name-style="western"><surname>Beks</surname><given-names>H</given-names> </name><name name-style="western"><surname>Williams</surname><given-names>LJ</given-names> </name><name name-style="western"><surname>Quirk</surname><given-names>SE</given-names> </name><name name-style="western"><surname>Versace</surname><given-names>VL</given-names> </name></person-group><article-title>A scoping review of the barriers and facilitators to accessing and utilising mental health services across regional, rural, and remote Australia</article-title><source>BMC Health Serv Res</source><year>2023</year><month>10</month><day>4</day><volume>23</volume><issue>1</issue><fpage>1060</fpage><pub-id pub-id-type="doi">10.1186/s12913-023-10034-4</pub-id><pub-id pub-id-type="medline">37794469</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="report"><article-title>Rural and remote health</article-title><year>2024</year><access-date>2025-09-25</access-date><publisher-name>Australian Institute of Health and Welfare</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.aihw.gov.au/reports/rural-remote-australians/rural-and-remote-health">https://www.aihw.gov.au/reports/rural-remote-australians/rural-and-remote-health</ext-link></comment></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="report"><person-group person-group-type="author"><collab>Nous Group</collab></person-group><article-title>Evidence base for additional investment in rural health in Australia</article-title><year>2023</year><access-date>2025-09-25</access-date><publisher-name>National Rural Health Alliance</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.ruralhealth.org.au/policy/position/evidence-base-for-additional-investment-in-rural-health-in-australia/">https://www.ruralhealth.org.au/policy/position/evidence-base-for-additional-investment-in-rural-health-in-australia/</ext-link></comment></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Manaf&#x00F2;</surname><given-names>E</given-names> </name><name name-style="western"><surname>Petermann</surname><given-names>L</given-names> </name><name name-style="western"><surname>Vandall-Walker</surname><given-names>V</given-names> </name><name name-style="western"><surname>Mason-Lai</surname><given-names>P</given-names> </name></person-group><article-title>Patient and public engagement in priority setting: a systematic rapid review of the literature</article-title><source>PLoS ONE</source><year>2018</year><volume>13</volume><issue>3</issue><fpage>e0193579</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0193579</pub-id><pub-id pub-id-type="medline">29499043</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Synnot</surname><given-names>A</given-names> </name><name name-style="western"><surname>Bragge</surname><given-names>P</given-names> </name><name name-style="western"><surname>Lowe</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Research priorities in health communication and participation: international survey of consumers and other stakeholders</article-title><source>BMJ Open</source><year>2018</year><month>05</month><day>8</day><volume>8</volume><issue>5</issue><fpage>e019481</fpage><pub-id pub-id-type="doi">10.1136/bmjopen-2017-019481</pub-id><pub-id pub-id-type="medline">29739780</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lam</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Lam</surname><given-names>LT</given-names> </name></person-group><article-title>Health information-seeking behaviour on the Internet and health literacy among older Australians</article-title><source>Electronic Journal of Health Informatics</source><year>2012</year><access-date>2025-09-25</access-date><volume>7</volume><issue>2</issue><comment><ext-link ext-link-type="uri" xlink:href="https://www.ssoar.info/ssoar/handle/document/92875">https://www.ssoar.info/ssoar/handle/document/92875</ext-link></comment></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rowlands</surname><given-names>IJ</given-names> </name><name name-style="western"><surname>Loxton</surname><given-names>D</given-names> </name><name name-style="western"><surname>Dobson</surname><given-names>A</given-names> </name><name name-style="western"><surname>Mishra</surname><given-names>GD</given-names> </name></person-group><article-title>Seeking health information online: association with young Australian women&#x2019;s physical, mental, and reproductive health</article-title><source>J Med Internet Res</source><year>2015</year><month>05</month><day>18</day><volume>17</volume><issue>5</issue><fpage>e120</fpage><pub-id pub-id-type="doi">10.2196/jmir.4048</pub-id><pub-id pub-id-type="medline">25986630</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wong</surname><given-names>C</given-names> </name><name name-style="western"><surname>Harrison</surname><given-names>C</given-names> </name><name name-style="western"><surname>Britt</surname><given-names>H</given-names> </name><name name-style="western"><surname>Henderson</surname><given-names>J</given-names> </name></person-group><article-title>Patient use of the internet for health information</article-title><source>Aust Fam Physician</source><year>2014</year><month>12</month><volume>43</volume><issue>12</issue><fpage>875</fpage><lpage>877</lpage><pub-id pub-id-type="medline">25705739</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zajac</surname><given-names>IT</given-names> </name><name name-style="western"><surname>Flight</surname><given-names>IHK</given-names> </name><name name-style="western"><surname>Wilson</surname><given-names>C</given-names> </name><name name-style="western"><surname>Turnbull</surname><given-names>D</given-names> </name><name name-style="western"><surname>Cole</surname><given-names>S</given-names> </name><name name-style="western"><surname>Young</surname><given-names>G</given-names> </name></person-group><article-title>Internet usage and openness to internet-delivered health information among Australian adults aged over 50 years</article-title><source>Australas Med J</source><year>2012</year><volume>5</volume><issue>5</issue><fpage>262</fpage><lpage>267</lpage><pub-id pub-id-type="doi">10.4066/AMJ.2012.1065</pub-id><pub-id pub-id-type="medline">22848321</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Quinn</surname><given-names>S</given-names> </name><name name-style="western"><surname>Bond</surname><given-names>R</given-names> </name><name name-style="western"><surname>Nugent</surname><given-names>C</given-names> </name></person-group><article-title>Quantifying health literacy and eHealth literacy using existing instruments and browser-based software for tracking online health information seeking behavior</article-title><source>Comput Human Behav</source><year>2017</year><month>04</month><volume>69</volume><fpage>256</fpage><lpage>267</lpage><pub-id pub-id-type="doi">10.1016/j.chb.2016.12.032</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ayre</surname><given-names>J</given-names> </name><name name-style="western"><surname>Cvejic</surname><given-names>E</given-names> </name><name name-style="western"><surname>McCaffery</surname><given-names>KJ</given-names> </name></person-group><article-title>Use of ChatGPT to obtain health information in Australia, 2024: insights from a nationally representative survey</article-title><source>Med J Aust</source><year>2025</year><month>03</month><day>3</day><volume>222</volume><issue>4</issue><fpage>210</fpage><lpage>212</lpage><pub-id pub-id-type="doi">10.5694/mja2.52598</pub-id><pub-id pub-id-type="medline">39901778</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nasra</surname><given-names>M</given-names> </name><name name-style="western"><surname>Jaffri</surname><given-names>R</given-names> </name><name name-style="western"><surname>Pavlin-Premrl</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Can artificial intelligence improve patient educational material readability? A systematic review and narrative synthesis</article-title><source>Intern Med J</source><year>2025</year><month>01</month><volume>55</volume><issue>1</issue><fpage>20</fpage><lpage>34</lpage><pub-id pub-id-type="doi">10.1111/imj.16607</pub-id><pub-id pub-id-type="medline">39720869</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cohen</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Brant</surname><given-names>A</given-names> </name><name name-style="western"><surname>Fisher</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Pershing</surname><given-names>S</given-names> </name><name name-style="western"><surname>Do</surname><given-names>D</given-names> </name><name name-style="western"><surname>Pan</surname><given-names>C</given-names> </name></person-group><article-title>Dr. Google vs. Dr. ChatGPT: exploring the use of artificial intelligence in ophthalmology by comparing the accuracy, safety, and readability of responses to frequently asked patient questions regarding cataracts and cataract surgery</article-title><source>Semin Ophthalmol</source><year>2024</year><month>08</month><volume>39</volume><issue>6</issue><fpage>472</fpage><lpage>479</lpage><pub-id pub-id-type="doi">10.1080/08820538.2024.2326058</pub-id><pub-id pub-id-type="medline">38516983</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rao</surname><given-names>A</given-names> </name><name name-style="western"><surname>Mu</surname><given-names>A</given-names> </name><name name-style="western"><surname>Enichen</surname><given-names>E</given-names> </name><etal/></person-group><article-title>A future of self-directed patient internet research: large language model-based tools versus standard search engines</article-title><source>Ann Biomed Eng</source><year>2025</year><month>05</month><volume>53</volume><issue>5</issue><fpage>1199</fpage><lpage>1208</lpage><pub-id pub-id-type="doi">10.1007/s10439-025-03701-6</pub-id><pub-id pub-id-type="medline">40025252</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cohen</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Yadlapalli</surname><given-names>N</given-names> </name><name name-style="western"><surname>Tijerina</surname><given-names>JD</given-names> </name><etal/></person-group><article-title>Comparing the ability of Google and ChatGPT to accurately respond to oculoplastics-related patient questions and generate customized oculoplastics patient education materials</article-title><source>Clin Ophthalmol</source><year>2024</year><volume>18</volume><fpage>2647</fpage><lpage>2655</lpage><pub-id pub-id-type="doi">10.2147/OPTH.S480222</pub-id><pub-id pub-id-type="medline">39323727</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chavda</surname><given-names>H</given-names> </name><name name-style="western"><surname>Sontam</surname><given-names>TR</given-names> </name><name name-style="western"><surname>Skinner</surname><given-names>WC</given-names> </name><name name-style="western"><surname>Ingall</surname><given-names>EM</given-names> </name><name name-style="western"><surname>Zide</surname><given-names>JR</given-names> </name></person-group><article-title>Comparison of responses from ChatGPT-4, Google Gemini, and Google Search to common patient questions about ankle sprains: a readability analysis</article-title><source>J Am Acad Orthop Surg</source><year>2025</year><month>07</month><day>3</day><volume>33</volume><issue>16</issue><fpage>924</fpage><lpage>930</lpage><pub-id pub-id-type="doi">10.5435/JAAOS-D-25-00260</pub-id><pub-id pub-id-type="medline">40627850</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pandya</surname><given-names>S</given-names> </name><name name-style="western"><surname>Alessandri Bonetti</surname><given-names>M</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>HY</given-names> </name><name name-style="western"><surname>Jeong</surname><given-names>T</given-names> </name><name name-style="western"><surname>Ziembicki</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Egro</surname><given-names>FM</given-names> </name></person-group><article-title>Burn patient education in the modern age: a comparative analysis of ChatGPT and Google performance answering common questions on burn injury and management</article-title><source>J Burn Care Res</source><year>2025</year><month>08</month><day>12</day><volume>46</volume><issue>3</issue><fpage>533</fpage><lpage>541</lpage><pub-id pub-id-type="doi">10.1093/jbcr/irae211</pub-id><pub-id pub-id-type="medline">39761346</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Mo</surname><given-names>F</given-names> </name><name name-style="western"><surname>Mao</surname><given-names>K</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Qian</surname><given-names>H</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>H</given-names> </name><name name-style="western"><surname>Cheng</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>A survey of conversational search</article-title><source>arXiv</source><comment>Preprint posted online on  Aug 5, 2025</comment><pub-id pub-id-type="doi">10.48550/arXiv.2410.15576</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>L</given-names> </name><name name-style="western"><surname>Meng</surname><given-names>J</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>Y</given-names> </name></person-group><article-title>LLM technologies and information search</article-title><source>Journal of Economy and Technology</source><year>2024</year><month>11</month><volume>2</volume><fpage>269</fpage><lpage>277</lpage><pub-id pub-id-type="doi">10.1016/j.ject.2024.08.007</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>B&#x00E9;lisle-Pipon</surname><given-names>JC</given-names> </name></person-group><article-title>Why we need to be careful with LLMs in medicine</article-title><source>Front Med (Lausanne)</source><year>2024</year><volume>11</volume><fpage>1495582</fpage><pub-id pub-id-type="doi">10.3389/fmed.2024.1495582</pub-id><pub-id pub-id-type="medline">39697212</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tian</surname><given-names>S</given-names> </name><name name-style="western"><surname>Jin</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Yeganova</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Opportunities and challenges for ChatGPT and large language models in biomedicine and health</article-title><source>Brief Bioinformatics</source><year>2023</year><month>11</month><day>22</day><volume>25</volume><issue>1</issue><pub-id pub-id-type="doi">10.1093/bib/bbad493</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Norman</surname><given-names>CD</given-names> </name><name name-style="western"><surname>Skinner</surname><given-names>HA</given-names> </name></person-group><article-title>eHEALS: the eHealth literacy scale</article-title><source>J Med Internet Res</source><year>2006</year><month>11</month><day>14</day><volume>8</volume><issue>4</issue><fpage>e27</fpage><pub-id pub-id-type="doi">10.2196/jmir.8.4.e27</pub-id><pub-id pub-id-type="medline">17213046</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Berkowsky</surname><given-names>RW</given-names> </name><name name-style="western"><surname>Sharit</surname><given-names>J</given-names> </name><name name-style="western"><surname>Czaja</surname><given-names>SJ</given-names> </name></person-group><article-title>Factors predicting decisions about technology adoption among older adults</article-title><source>Innovation in Aging</source><year>2017</year><month>11</month><day>1</day><volume>1</volume><issue>3</issue><pub-id pub-id-type="doi">10.1093/geroni/igy002</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="web"><article-title>Introducing AI overviews in Australia, a new generative AI experience on search</article-title><source>Google Australia Blog</source><access-date>2025-09-20</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://blog.google/intl/en-au/company-news/outreach-initiatives/ai-overviews-australia">https://blog.google/intl/en-au/company-news/outreach-initiatives/ai-overviews-australia</ext-link></comment></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Diviani</surname><given-names>N</given-names> </name><name name-style="western"><surname>van den Putte</surname><given-names>B</given-names> </name><name name-style="western"><surname>Giani</surname><given-names>S</given-names> </name><name name-style="western"><surname>van Weert</surname><given-names>JC</given-names> </name></person-group><article-title>Low health literacy and evaluation of online health information: a systematic review of the literature</article-title><source>J Med Internet Res</source><year>2015</year><month>05</month><day>7</day><volume>17</volume><issue>5</issue><fpage>e112</fpage><pub-id pub-id-type="doi">10.2196/jmir.4018</pub-id><pub-id pub-id-type="medline">25953147</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Noy</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>W</given-names> </name></person-group><article-title>Experimental evidence on the productivity effects of generative artificial intelligence</article-title><source>Science</source><year>2023</year><month>07</month><day>14</day><volume>381</volume><issue>6654</issue><fpage>187</fpage><lpage>192</lpage><pub-id pub-id-type="doi">10.1126/science.adh2586</pub-id><pub-id pub-id-type="medline">37440646</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xu</surname><given-names>R</given-names> </name><name name-style="western"><surname>Feng</surname><given-names>Y (Katherine</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>H</given-names> </name></person-group><article-title>ChatGPT vs. Google: a comparative study of search performance and user experience</article-title><source>SSRN Journal</source><pub-id pub-id-type="doi">10.2139/ssrn.4498671</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Robards</surname><given-names>F</given-names> </name><name name-style="western"><surname>Kang</surname><given-names>M</given-names> </name><name name-style="western"><surname>Steinbeck</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Health care equity and access for marginalised young people: a longitudinal qualitative study exploring health system navigation in Australia</article-title><source>Int J Equity Health</source><year>2019</year><month>03</month><day>4</day><volume>18</volume><issue>1</issue><fpage>41</fpage><pub-id pub-id-type="doi">10.1186/s12939-019-0941-2</pub-id><pub-id pub-id-type="medline">30832651</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Sun</surname><given-names>X</given-names> </name><name name-style="western"><surname>Ma</surname><given-names>R</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>X</given-names> </name><name name-style="western"><surname>Li</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Lindqvist</surname><given-names>J</given-names> </name><name name-style="western"><surname>El Ali</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Trusting the search: unraveling human trust in health information from Google and ChatGPT</article-title><source>arXiv</source><comment>Preprint posted online on  Mar 15, 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2403.09987</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moise</surname><given-names>A</given-names> </name><name name-style="western"><surname>Centomo-Bozzo</surname><given-names>A</given-names> </name><name name-style="western"><surname>Orishchak</surname><given-names>O</given-names> </name><name name-style="western"><surname>Alnoury</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Daniel</surname><given-names>SJ</given-names> </name></person-group><article-title>Can ChatGPT replace an otolaryngologist in guiding parents on tonsillectomy?</article-title><source>Ear Nose Throat J</source><year>2024</year><month>04</month><day>2</day><fpage>1455613241230841</fpage><pub-id pub-id-type="doi">10.1177/01455613241230841</pub-id><pub-id pub-id-type="medline">38563440</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alagarsamy</surname><given-names>S</given-names> </name><name name-style="western"><surname>Mehrolia</surname><given-names>S</given-names> </name></person-group><article-title>Exploring chatbot trust: antecedents and behavioural outcomes</article-title><source>Heliyon</source><year>2023</year><month>05</month><volume>9</volume><issue>5</issue><fpage>e16074</fpage><pub-id pub-id-type="doi">10.1016/j.heliyon.2023.e16074</pub-id><pub-id pub-id-type="medline">37206046</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sbaffi</surname><given-names>L</given-names> </name><name name-style="western"><surname>Rowley</surname><given-names>J</given-names> </name></person-group><article-title>Trust and credibility in web-based health information: a review and agenda for future research</article-title><source>J Med Internet Res</source><year>2017</year><month>06</month><day>19</day><volume>19</volume><issue>6</issue><fpage>e218</fpage><pub-id pub-id-type="doi">10.2196/jmir.7579</pub-id><pub-id pub-id-type="medline">28630033</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stadler</surname><given-names>M</given-names> </name><name name-style="western"><surname>Bannert</surname><given-names>M</given-names> </name><name name-style="western"><surname>Sailer</surname><given-names>M</given-names> </name></person-group><article-title>Cognitive ease at a cost: LLMs reduce mental effort but compromise depth in student scientific inquiry</article-title><source>Comput Human Behav</source><year>2024</year><month>11</month><volume>160</volume><fpage>108386</fpage><pub-id pub-id-type="doi">10.1016/j.chb.2024.108386</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Caramancion</surname><given-names>KM</given-names> </name></person-group><article-title>Large language models vs. search engines: evaluating user preferences across varied information retrieval scenarios</article-title><source>arXiv</source><comment>Preprint posted online on  Jan 11, 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2401.05761</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Text of all scenarios and associated tasks.</p><media xlink:href="ai_v4i1e76203_app1.xlsx" xlink:title="XLSX File, 11 KB"/></supplementary-material></app-group></back></article>