@Article{info:doi/10.2196/65567, author="Kauttonen, Janne and Rousi, Rebekah and Alam{\"a}ki, Ari", title="Trust and Acceptance Challenges in the Adoption of AI Applications in Health Care: Quantitative Survey Analysis", journal="J Med Internet Res", year="2025", month="Mar", day="21", volume="27", pages="e65567", keywords="artificial intelligence", keywords="AI", keywords="health care technology", keywords="technology adoption", keywords="predictive modeling", keywords="user trust", keywords="user acceptance", abstract="Background: Artificial intelligence (AI) has potential to transform health care, but its successful implementation depends on the trust and acceptance of consumers and patients. Understanding the factors that influence attitudes toward AI is crucial for effective adoption. Despite AI's growing integration into health care, consumer and patient acceptance remains a critical challenge. Research has largely focused on applications or attitudes, lacking a comprehensive analysis of how factors, such as demographics, personality traits, technology attitudes, and AI knowledge, affect and interact across different health care AI contexts. Objective: We aimed to investigate people's trust in and acceptance of AI across health care use cases and determine how context and perceived risk affect individuals' propensity to trust and accept AI in specific health care scenarios. Methods: We collected and analyzed web-based survey data from 1100 Finnish participants, presenting them with 8 AI use cases in health care: 5 (62\%) noninvasive applications (eg, activity monitoring and mental health support) and 3 (38\%) physical interventions (eg, AI-controlled robotic surgery). Respondents evaluated intention to use, trust, and willingness to trade off personal data for these use cases. Gradient boosted tree regression models were trained to predict responses based on 33 demographic-, personality-, and technology-related variables. To interpret the results of our predictive models, we used the Shapley additive explanations method, a game theory--based approach for explaining the output of machine learning models. It quantifies the contribution of each feature to individual predictions, allowing us to determine the relative importance of various demographic-, personality-, and technology-related factors and their interactions in shaping participants' trust in and acceptance of AI in health care. Results: Consumer attitudes toward technology, technology use, and personality traits were the primary drivers of trust and intention to use AI in health care. Use cases were ranked by acceptance, with noninvasive monitors being the most preferred. However, the specific use case had less impact in general than expected. Nonlinear dependencies were observed, including an inverted U-shaped pattern in positivity toward AI based on self-reported AI knowledge. Certain personality traits, such as being more disorganized and careless, were associated with more positive attitudes toward AI in health care. Women seemed more cautious about AI applications in health care than men. Conclusions: The findings highlight the complex interplay of factors influencing trust and acceptance of AI in health care. Consumer trust and intention to use AI in health care are driven by technology attitudes and use rather than specific use cases. AI service providers should consider demographic factors, personality traits, and technology attitudes when designing and implementing AI systems in health care. The study demonstrates the potential of using predictive AI models as decision-making tools for implementing and interacting with clients in health care AI applications. ", doi="10.2196/65567", url="https://www.jmir.org/2025/1/e65567" } @Article{info:doi/10.2196/55277, author="Lau, Jerry and Bisht, Shivani and Horton, Robert and Crisan, Annamaria and Jones, John and Gantotti, Sandeep and Hermes-DeSantis, Evelyn", title="Creation of Scientific Response Documents for Addressing Product Medical Information Inquiries: Mixed Method Approach Using Artificial Intelligence", journal="JMIR AI", year="2025", month="Mar", day="13", volume="4", pages="e55277", keywords="AI", keywords="LLM", keywords="GPT", keywords="biopharmaceutical", keywords="medical information", keywords="content generation", keywords="artificial intelligence", keywords="pharmaceutical", keywords="scientific response", keywords="documentation", keywords="information", keywords="clinical data", keywords="strategy", keywords="reference", keywords="feasibility", keywords="development", keywords="machine learning", keywords="large language model", keywords="accuracy", keywords="context", keywords="traceability", keywords="accountability", keywords="survey", keywords="scientific response documentation", keywords="SRD", keywords="benefit", keywords="content generator", keywords="content analysis", keywords="Generative Pre-trained Transformer", abstract="Background: Pharmaceutical manufacturers address health care professionals' information needs through scientific response documents (SRDs), offering evidence-based answers to medication and disease state questions. Medical information departments, staffed by medical experts, develop SRDs that provide concise summaries consisting of relevant background information, search strategies, clinical data, and balanced references. With an escalating demand for SRDs and the increasing complexity of therapies, medical information departments are exploring advanced technologies and artificial intelligence (AI) tools like large language models (LLMs) to streamline content development. While AI and LLMs show promise in generating draft responses, a synergistic approach combining an LLM with traditional machine learning classifiers in a series of human-supervised and -curated steps could help address limitations, including hallucinations. This will ensure accuracy, context, traceability, and accountability in the development of the concise clinical data summaries of an SRD. Objective: This study aims to quantify the challenges of SRD development and develop a framework exploring the feasibility and value addition of integrating AI capabilities in the process of creating concise summaries for an SRD. Methods: To measure the challenges in SRD development, a survey was conducted by phactMI, a nonprofit consortium of medical information leaders in the pharmaceutical industry, assessing aspects of SRD creation among its member companies. The survey collected data on the time and tediousness of various activities related to SRD development. Another working group, consisting of medical information professionals and data scientists, used AI to aid SRD authoring, focusing on data extraction and abstraction. They used logistic regression on semantic embedding features to train classification models and transformer-based summarization pipelines to generate concise summaries. Results: Of the 33 companies surveyed, 64\% (21/33) opened the survey, and 76\% (16/21) of those responded. On average, medical information departments generate 614 new documents and update 1352 documents each year. Respondents considered paraphrasing scientific articles to be the most tedious and time-intensive task. In the project's second phase, sentence classification models showed the ability to accurately distinguish target categories with receiver operating characteristic scores ranging from 0.67 to 0.85 (all P<.001), allowing for accurate data extraction. For data abstraction, the comparison of the bilingual evaluation understudy (BLEU) score and semantic similarity in the paraphrased texts yielded different results among reviewers, with each preferring different trade-offs between these metrics. Conclusions: This study establishes a framework for integrating LLM and machine learning into SRD development, supported by a pharmaceutical company survey emphasizing the challenges of paraphrasing content. While machine learning models show potential for section identification and content usability assessment in data extraction and abstraction, further optimization and research are essential before full-scale industry implementation. The working group's insights guide an AI-driven content analysis; address limitations; and advance efficient, precise, and responsive frameworks to assist with pharmaceutical SRD development. ", doi="10.2196/55277", url="https://ai.jmir.org/2025/1/e55277" } @Article{info:doi/10.2196/66760, author="Guo, Weiqi and Chen, Yang", title="Investigating Whether AI Will Replace Human Physicians and Understanding the Interplay of the Source of Consultation, Health-Related Stigma, and Explanations of Diagnoses on Patients' Evaluations of Medical Consultations: Randomized Factorial Experiment", journal="J Med Internet Res", year="2025", month="Mar", day="5", volume="27", pages="e66760", keywords="artificial intelligence", keywords="AI", keywords="medical artificial intelligence", keywords="medical AI", keywords="human--artificial intelligence interaction", keywords="human-AI interaction", keywords="medical consultation", keywords="health-related stigma", keywords="diagnosis explanation", keywords="health communication", abstract="Background: The increasing use of artificial intelligence (AI) in medical diagnosis and consultation promises benefits such as greater accuracy and efficiency. However, there is little evidence to systematically test whether the ideal technological promises translate into an improved evaluation of the medical consultation from the patient's perspective. This perspective is significant because AI as a technological solution does not necessarily improve patient confidence in diagnosis and adherence to treatment at the functional level, create meaningful interactions between the medical agent and the patient at the relational level, evoke positive emotions, or reduce the patient's pessimism at the emotional level. Objective: This study aims to investigate, from a patient-centered perspective, whether AI or human-involved AI can replace the role of human physicians in diagnosis at the functional, relational, and emotional levels as well as how some health-related differences between human-AI and human-human interactions affect patients' evaluations of the medical consultation. Methods: A 3 (consultation source: AI vs human-involved AI vs human) {\texttimes} 2 (health-related stigma: low vs high) {\texttimes} 2 (diagnosis explanation: without vs with explanation) factorial experiment was conducted with 249 participants. The main effects and interaction effects of the variables were examined on individuals' functional, relational, and emotional evaluations of the medical consultation. Results: Functionally, people trusted the diagnosis of the human physician (mean 4.78-4.85, SD 0.06-0.07) more than medical AI (mean 4.34-4.55, SD 0.06-0.07) or human-involved AI (mean 4.39-4.56, SD 0.06-0.07; P<.001), but at the relational and emotional levels, there was no significant difference between human-AI and human-human interactions (P>.05). Health-related stigma had no significant effect on how people evaluated the medical consultation or contributed to preferring AI-powered systems over humans (P>.05); however, providing explanations of the diagnosis significantly improved the functional (P<.001), relational (P<.05), and emotional (P<.05) evaluations of the consultation for all 3 medical agents. Conclusions: The findings imply that at the current stage of AI development, people trust human expertise more than accurate AI, especially for decisions traditionally made by humans, such as medical diagnosis, supporting the algorithm aversion theory. Surprisingly, even for highly stigmatized diseases such as AIDS, where we assume anonymity and privacy are preferred in medical consultations, the dehumanization of AI does not contribute significantly to the preference for AI-powered medical agents over humans, suggesting that instrumental needs of diagnosis override patient privacy concerns. Furthermore, explaining the diagnosis effectively improves treatment adherence, strengthens the physician-patient relationship, and fosters positive emotions during the consultation. This provides insights for the design of AI medical agents, which have long been criticized for lacking transparency while making highly consequential decisions. This study concludes by outlining theoretical contributions to research on health communication and human-AI interaction and discusses the implications for the design and application of medical AI. ", doi="10.2196/66760", url="https://www.jmir.org/2025/1/e66760", url="http://www.ncbi.nlm.nih.gov/pubmed/40053785" } @Article{info:doi/10.2196/53892, author="Cabral, Pereira Bernardo and Braga, Maciel Luiza Amara and Conte Filho, Gilbert Carlos and Penteado, Bruno and Freire de Castro Silva, Luis Sandro and Castro, Leonardo and Fornazin, Marcelo and Mota, Fabio", title="Future Use of AI in Diagnostic Medicine: 2-Wave Cross-Sectional Survey Study", journal="J Med Internet Res", year="2025", month="Feb", day="27", volume="27", pages="e53892", keywords="artificial intelligence", keywords="AI", keywords="diagnostic medicine", keywords="survey research", keywords="researcher opinion", keywords="future", abstract="Background: The rapid evolution of artificial intelligence (AI) presents transformative potential for diagnostic medicine, offering opportunities to enhance diagnostic accuracy, reduce costs, and improve patient outcomes. Objective: This study aimed to assess the expected future impact of AI on diagnostic medicine by comparing global researchers' expectations using 2 cross-sectional surveys. Methods: The surveys were conducted in September 2020 and February 2023. Each survey captured a 10-year projection horizon, gathering insights from >3700 researchers with expertise in AI and diagnostic medicine from all over the world. The survey sought to understand the perceived benefits, integration challenges, and evolving attitudes toward AI use in diagnostic settings. Results: Results indicated a strong expectation among researchers that AI will substantially influence diagnostic medicine within the next decade. Key anticipated benefits include enhanced diagnostic reliability, reduced screening costs, improved patient care, and decreased physician workload, addressing the growing demand for diagnostic services outpacing the supply of medical professionals. Specifically, x-ray diagnosis, heart rhythm interpretation, and skin malignancy detection were identified as the diagnostic tools most likely to be integrated with AI technologies due to their maturity and existing AI applications. The surveys highlighted the growing optimism regarding AI's ability to transform traditional diagnostic pathways and enhance clinical decision-making processes. Furthermore, the study identified barriers to the integration of AI in diagnostic medicine. The primary challenges cited were the difficulties of embedding AI within existing clinical workflows, ethical and regulatory concerns, and data privacy issues. Respondents emphasized uncertainties around legal responsibility and accountability for AI-supported clinical decisions, data protection challenges, and the need for robust regulatory frameworks to ensure safe AI deployment. Ethical concerns, particularly those related to algorithmic transparency and bias, were noted as increasingly critical, reflecting a heightened awareness of the potential risks associated with AI adoption in clinical settings. Differences between the 2 survey waves indicated a growing focus on ethical and regulatory issues, suggesting an evolving recognition of these challenges over time. Conclusions: Despite these barriers, there was notable consistency in researchers' expectations across the 2 survey periods, indicating a stable and sustained outlook on AI's transformative potential in diagnostic medicine. The findings show the need for interdisciplinary collaboration among clinicians, AI developers, and regulators to address ethical and practical challenges while maximizing AI's benefits. This study offers insights into the projected trajectory of AI in diagnostic medicine, guiding stakeholders, including health care providers, policy makers, and technology developers, on navigating the opportunities and challenges of AI integration. ", doi="10.2196/53892", url="https://www.jmir.org/2025/1/e53892", url="http://www.ncbi.nlm.nih.gov/pubmed/40053779" } @Article{info:doi/10.2196/68347, author="Hadar-Shoval, Dorit and Lvovsky, Maya and Asraf, Kfir and Shimoni, Yoav and Elyoseph, Zohar", title="The Feasibility of Large Language Models in Verbal Comprehension Assessment: Mixed Methods Feasibility Study", journal="JMIR Form Res", year="2025", month="Feb", day="24", volume="9", pages="e68347", keywords="large language models", keywords="verbal comprehension assessment", keywords="artificial intelligence", keywords="AI in psychodiagnostics", keywords="personalized intelligence tests", keywords="verbal comprehension index", keywords="Wechsler Adult Intelligence Scale", keywords="WAIS-III", keywords="psychological test validity", keywords="ethics in computerized cognitive assessment", abstract="Background: Cognitive assessment is an important component of applied psychology, but limited access and high costs make these evaluations challenging. Objective: This study aimed to examine the feasibility of using large language models (LLMs) to create personalized artificial intelligence--based verbal comprehension tests (AI-BVCTs) for assessing verbal intelligence, in contrast with traditional assessment methods based on standardized norms. Methods: We used a within-participants design, comparing scores obtained from AI-BVCTs with those from the Wechsler Adult Intelligence Scale (WAIS-III) verbal comprehension index (VCI). In total, 8 Hebrew-speaking participants completed both the VCI and AI-BVCT, the latter being generated using the LLM Claude. Results: The concordance correlation coefficient (CCC) demonstrated strong agreement between AI-BVCT and VCI scores (Claude: CCC=.75, 90\% CI 0.266-0.933; GPT-4: CCC=.73, 90\% CI 0.170-0.935). Pearson correlations further supported these findings, showing strong associations between VCI and AI-BVCT scores (Claude: r=.84, P<.001; GPT-4: r=.77, P=.02). No statistically significant differences were found between AI-BVCT and VCI scores (P>.05). Conclusions: These findings support the potential of LLMs to assess verbal intelligence. The study attests to the promise of AI-based cognitive tests in increasing the accessibility and affordability of assessment processes, enabling personalized testing. The research also raises ethical concerns regarding privacy and overreliance on AI in clinical work. Further research with larger and more diverse samples is needed to establish the validity and reliability of this approach and develop more accurate scoring procedures. ", doi="10.2196/68347", url="https://formative.jmir.org/2025/1/e68347" } @Article{info:doi/10.2196/50708, author="Rinderknecht, Fatuma-Ayaan and Yang, B. Vivian and Tilahun, Mekaleya and Lester, C. Jenna", title="Perspectives of Black, Latinx, Indigenous, and Asian Communities on Health Data Use and AI: Cross-Sectional Survey Study", journal="J Med Internet Res", year="2025", month="Feb", day="21", volume="27", pages="e50708", keywords="augmented intelligence", keywords="artificial intelligence", keywords="health equity", keywords="dermatology", keywords="Black", keywords="Latinx", keywords="Indigenous", keywords="Asian", keywords="racial and ethnic minority communities", keywords="AI", keywords="health care", keywords="health data", keywords="survey", keywords="racism", keywords="large language model", keywords="LLM", keywords="diversity", doi="10.2196/50708", url="https://www.jmir.org/2025/1/e50708" } @Article{info:doi/10.2196/65565, author="Owoyemi, Ayomide and Osuchukwu, Joanne and Salwei, E. Megan and Boyd, Andrew", title="Checklist Approach to Developing and Implementing AI in Clinical Settings: Instrument Development Study", journal="JMIRx Med", year="2025", month="Feb", day="20", volume="6", pages="e65565", keywords="artificial intelligence", keywords="machine learning", keywords="algorithm", keywords="model", keywords="analytics", keywords="AI deployment", keywords="human-AI interaction", keywords="AI integration", keywords="checklist", keywords="clinical workflow", keywords="clinical setting", keywords="literature review", abstract="Background: The integration of artificial intelligence (AI) in health care settings demands a nuanced approach that considers both technical performance and sociotechnical factors. Objective: This study aimed to develop a checklist that addresses the sociotechnical aspects of AI deployment in health care and provides a structured, holistic guide for teams involved in the life cycle of AI systems. Methods: A literature synthesis identified 20 relevant studies, forming the foundation for the Clinical AI Sociotechnical Framework checklist. A modified Delphi study was then conducted with 35 global health care professionals. Participants assessed the checklist's relevance across 4 stages: ``Planning,'' ``Design,'' ``Development,'' and ``Proposed Implementation.'' A consensus threshold of 80\% was established for each item. IQRs and Cronbach $\alpha$ were calculated to assess agreement and reliability. Results: The initial checklist had 45 questions. Following participant feedback, the checklist was refined to 34 items, and a final round saw 100\% consensus on all items (mean score >0.8, IQR 0). Based on the outcome of the Delphi study, a final checklist was outlined, with 1 more question added to make 35 questions in total. Conclusions: The Clinical AI Sociotechnical Framework checklist provides a comprehensive, structured approach to developing and implementing AI in clinical settings, addressing technical and social factors critical for adoption and success. This checklist is a practical tool that aligns AI development with real-world clinical needs, aiming to enhance patient outcomes and integrate smoothly into health care workflows. ", doi="10.2196/65565", url="https://xmed.jmir.org/2025/1/e65565" } @Article{info:doi/10.2196/65699, author="King, C. Abby and Doueiri, N. Zakaria and Kaulberg, Ankita and Goldman Rosas, Lisa", title="The Promise and Perils of Artificial Intelligence in Advancing Participatory Science and Health Equity in Public Health", journal="JMIR Public Health Surveill", year="2025", month="Feb", day="14", volume="11", pages="e65699", keywords="digital health", keywords="artificial intelligence", keywords="community-based participatory research", keywords="citizen science", keywords="health equity", keywords="societal trends", keywords="public health", keywords="viewpoint", keywords="policy makers", keywords="public participation", keywords="information technology", keywords="micro-level data", keywords="macro-level data", keywords="LLM", keywords="natural language processing", keywords="machine learning", keywords="language model", keywords="Our Voice", doi="10.2196/65699", url="https://publichealth.jmir.org/2025/1/e65699" } @Article{info:doi/10.2196/60847, author="Choudhury, Ananya and Volmer, Leroy and Martin, Frank and Fijten, Rianne and Wee, Leonard and Dekker, Andre and Soest, van Johan", title="Advancing Privacy-Preserving Health Care Analytics and Implementation of the Personal Health Train: Federated Deep Learning Study", journal="JMIR AI", year="2025", month="Feb", day="6", volume="4", pages="e60847", keywords="gross tumor volume segmentation", keywords="federated learning infrastructure", keywords="privacy-preserving technology", keywords="cancer", keywords="deep learning", keywords="artificial intelligence", keywords="lung cancer", keywords="oncology", keywords="radiotherapy", keywords="imaging", keywords="data protection", keywords="data privacy", abstract="Background: The rapid advancement of deep learning in health care presents significant opportunities for automating complex medical tasks and improving clinical workflows. However, widespread adoption is impeded by data privacy concerns and the necessity for large, diverse datasets across multiple institutions. Federated learning (FL) has emerged as a viable solution, enabling collaborative artificial intelligence model development without sharing individual patient data. To effectively implement FL in health care, robust and secure infrastructures are essential. Developing such federated deep learning frameworks is crucial to harnessing the full potential of artificial intelligence while ensuring patient data privacy and regulatory compliance. Objective: The objective is to introduce an innovative FL infrastructure called the Personal Health Train (PHT) that includes the procedural, technical, and governance components needed to implement FL on real-world health care data, including training deep learning neural networks. The study aims to apply this federated deep learning infrastructure to the use case of gross tumor volume segmentation on chest computed tomography images of patients with lung cancer and present the results from a proof-of-concept experiment. Methods: The PHT framework addresses the challenges of data privacy when sharing data, by keeping data close to the source and instead bringing the analysis to the data. Technologically, PHT requires 3 interdependent components: ``tracks'' (protected communication channels), ``trains'' (containerized software apps), and ``stations'' (institutional data repositories), which are supported by the open source ``Vantage6'' software. The study applies this federated deep learning infrastructure to the use case of gross tumor volume segmentation on chest computed tomography images of patients with lung cancer, with the introduction of an additional component called the secure aggregation server, where the model averaging is done in a trusted and inaccessible environment. Results: We demonstrated the feasibility of executing deep learning algorithms in a federated manner using PHT and presented the results from a proof-of-concept study. The infrastructure linked 12 hospitals across 8 nations, covering 4 continents, demonstrating the scalability and global reach of the proposed approach. During the execution and training of the deep learning algorithm, no data were shared outside the hospital. Conclusions: The findings of the proof-of-concept study, as well as the implications and limitations of the infrastructure and the results, are discussed. The application of federated deep learning to unstructured medical imaging data, facilitated by the PHT framework and Vantage6 platform, represents a significant advancement in the field. The proposed infrastructure addresses the challenges of data privacy and enables collaborative model development, paving the way for the widespread adoption of deep learning--based tools in the medical domain and beyond. The introduction of the secure aggregation server implied that data leakage problems in FL can be prevented by careful design decisions of the infrastructure. Trial Registration: ClinicalTrials.gov NCT05775068; https://clinicaltrials.gov/study/NCT05775068 ", doi="10.2196/60847", url="https://ai.jmir.org/2025/1/e60847" } @Article{info:doi/10.2196/67485, author="Jacob, Christine and Brasier, No{\'e} and Laurenzi, Emanuele and Heuss, Sabina and Mougiakakou, Stavroula-Georgia and C{\"o}ltekin, Arzu and Peter, K. Marc", title="AI for IMPACTS Framework for Evaluating the Long-Term Real-World Impacts of AI-Powered Clinician Tools: Systematic Review and Narrative Synthesis", journal="J Med Internet Res", year="2025", month="Feb", day="5", volume="27", pages="e67485", keywords="eHealth", keywords="assessment", keywords="adoption", keywords="implementation", keywords="artificial intelligence", keywords="clinician", keywords="efficiency", keywords="health technology assessment", keywords="clinical practice", abstract="Background: Artificial intelligence (AI) has the potential to revolutionize health care by enhancing both clinical outcomes and operational efficiency. However, its clinical adoption has been slower than anticipated, largely due to the absence of comprehensive evaluation frameworks. Existing frameworks remain insufficient and tend to emphasize technical metrics such as accuracy and validation, while overlooking critical real-world factors such as clinical impact, integration, and economic sustainability. This narrow focus prevents AI tools from being effectively implemented, limiting their broader impact and long-term viability in clinical practice. Objective: This study aimed to create a framework for assessing AI in health care, extending beyond technical metrics to incorporate social and organizational dimensions. The framework was developed by systematically reviewing, analyzing, and synthesizing the evaluation criteria necessary for successful implementation, focusing on the long-term real-world impact of AI in clinical practice. Methods: A search was performed in July 2024 across the PubMed, Cochrane, Scopus, and IEEE Xplore databases to identify relevant studies published in English between January 2019 and mid-July 2024, yielding 3528 results, among which 44 studies met the inclusion criteria. The systematic review followed PRISMA (Preferred Reporting Items for Systematic reviews and Meta-Analyses) guidelines and the Cochrane Handbook for Systematic Reviews. Data were analyzed using NVivo through thematic analysis and narrative synthesis to identify key emergent themes in the studies. Results: By synthesizing the included studies, we developed a framework that goes beyond the traditional focus on technical metrics or study-level methodologies. It integrates clinical context and real-world implementation factors, offering a more comprehensive approach to evaluating AI tools. With our focus on assessing the long-term real-world impact of AI technologies in health care, we named the framework AI for IMPACTS. The criteria are organized into seven key clusters, each corresponding to a letter in the acronym: (1) I---integration, interoperability, and workflow; (2) M---monitoring, governance, and accountability; (3) P---performance and quality metrics; (4) A---acceptability, trust, and training; (5) C---cost and economic evaluation; (6) T---technological safety and transparency; and (7) S---scalability and impact. These are further broken down into 28 specific subcriteria. Conclusions: The AI for IMPACTS framework offers a holistic approach to evaluate the long-term real-world impact of AI tools in the heterogeneous and challenging health care context and lays the groundwork for further validation through expert consensus and testing of the framework in real-world health care settings. It is important to emphasize that multidisciplinary expertise is essential for assessment, yet many assessors lack the necessary training. In addition, traditional evaluation methods struggle to keep pace with AI's rapid development. To ensure successful AI integration, flexible, fast-tracked assessment processes and proper assessor training are needed to maintain rigorous standards while adapting to AI's dynamic evolution. Trial Registration: reviewregistry1859; https://tinyurl.com/ysn2d7sh ", doi="10.2196/67485", url="https://www.jmir.org/2025/1/e67485" } @Article{info:doi/10.2196/58161, author="Gazquez-Garcia, Javier and S{\'a}nchez-Bocanegra, Luis Carlos and Sevillano, Luis Jose", title="AI in the Health Sector: Systematic Review of Key Skills for Future Health Professionals", journal="JMIR Med Educ", year="2025", month="Feb", day="5", volume="11", pages="e58161", keywords="artificial intelligence", keywords="healthcare competencies", keywords="systematic review", keywords="healthcare education", keywords="AI regulation", abstract="Background: Technological advancements have significantly reshaped health care, introducing digital solutions that enhance diagnostics and patient care. Artificial intelligence (AI) stands out, offering unprecedented capabilities in data analysis, diagnostic support, and personalized medicine. However, effectively integrating AI into health care necessitates specialized competencies among professionals, an area still in its infancy in terms of comprehensive literature and formalized training programs. Objective: This systematic review aims to consolidate the essential skills and knowledge health care professionals need to integrate AI into their clinical practice effectively, according to the published literature. Methods: We conducted a systematic review, across databases PubMed, Scopus, and Web of Science, of peer-reviewed literature that directly explored the required skills for health care professionals to integrate AI into their practice, published in English or Spanish from 2018 onward. Studies that did not refer to specific skills or training in digital health were not included, discarding those that did not directly contribute to understanding the competencies necessary to integrate AI into health care practice. Bias in the examined works was evaluated following Cochrane's domain-based recommendations. Results: The initial database search yielded a total of 2457 articles. After deleting duplicates and screening titles and abstracts, 37 articles were selected for full-text review. Out of these, only 7 met all the inclusion criteria for this systematic review. The review identified a diverse range of skills and competencies, that we categorized into 14 key areas classified based on their frequency of appearance in the selected studies, including AI fundamentals, data analytics and management, and ethical considerations. Conclusions: Despite the broadening of search criteria to capture the evolving nature of AI in health care, the review underscores a significant gap in focused studies on the required competencies. Moreover, the review highlights the critical role of regulatory bodies such as the US Food and Drug Administration in facilitating the adoption of AI technologies by establishing trust and standardizing algorithms. Key areas were identified for developing competencies among health care professionals for the implementation of AI, including: AI fundamentals knowledge (more focused on assessing the accuracy, reliability, and validity of AI algorithms than on more technical abilities such as programming or mathematics), data analysis skills (including data acquisition, cleaning, visualization, management, and governance), and ethical and legal considerations. In an AI-enhanced health care landscape, the ability to humanize patient care through effective communication is paramount. This balance ensures that while AI streamlines tasks and potentially increases patient interaction time, health care professionals maintain a focus on compassionate care, thereby leveraging AI to enhance, rather than detract from, the patient experience.\emspace ", doi="10.2196/58161", url="https://mededu.jmir.org/2025/1/e58161" } @Article{info:doi/10.2196/51785, author="Werder, Karl and Cao, Lan and Park, Hee Eun and Ramesh, Balasubramaniam", title="Why AI Monitoring Faces Resistance and What Healthcare Organizations Can Do About It: An Emotion-Based Perspective", journal="J Med Internet Res", year="2025", month="Jan", day="31", volume="27", pages="e51785", keywords="artificial intelligence", keywords="AI monitoring", keywords="emotion", keywords="resistance", keywords="health care", doi="10.2196/51785", url="https://www.jmir.org/2025/1/e51785" } @Article{info:doi/10.2196/67878, author="Eysenbach, Gunther", title="Crisis Text Line and Loris.ai Controversy Highlights the Complexity of Informed Consent on the Internet and Data-Sharing Ethics for Machine Learning and Research", journal="J Med Internet Res", year="2025", month="Jan", day="22", volume="27", pages="e67878", keywords="data ethics", keywords="data sharing", keywords="informed consent", keywords="disclosure", keywords="conflict of interest", keywords="transparency", keywords="trust", doi="10.2196/67878", url="https://www.jmir.org/2025/1/e67878" } @Article{info:doi/10.2196/68198, author="Bazzano, N. Alessandra and Mantsios, Andrea and Mattei, Nicholas and Kosorok, R. Michael and Culotta, Aron", title="AI Can Be a Powerful Social Innovation for Public Health if Community Engagement Is at the Core", journal="J Med Internet Res", year="2025", month="Jan", day="22", volume="27", pages="e68198", keywords="Artificial Intelligence", keywords="Generative Artificial Intelligence", keywords="Citizen Science", keywords="Community Participation", keywords="Innovation Diffusion", doi="10.2196/68198", url="https://www.jmir.org/2025/1/e68198", url="http://www.ncbi.nlm.nih.gov/pubmed/39841529" } @Article{info:doi/10.2196/60269, author="Sasseville, Maxime and Ouellet, Steven and Rh{\'e}aume, Caroline and Sahlia, Malek and Couture, Vincent and Despr{\'e}s, Philippe and Paquette, Jean-S{\'e}bastien and Darmon, David and Bergeron, Fr{\'e}d{\'e}ric and Gagnon, Marie-Pierre", title="Bias Mitigation in Primary Health Care Artificial Intelligence Models: Scoping Review", journal="J Med Internet Res", year="2025", month="Jan", day="7", volume="27", pages="e60269", keywords="artificial intelligence", keywords="AI", keywords="algorithms", keywords="expert system", keywords="decision support", keywords="bias", keywords="community health services", keywords="primary health care", keywords="health disparities", keywords="social equity", keywords="scoping review", abstract="Background: Artificial intelligence (AI) predictive models in primary health care have the potential to enhance population health by rapidly and accurately identifying individuals who should receive care and health services. However, these models also carry the risk of perpetuating or amplifying existing biases toward diverse groups. We identified a gap in the current understanding of strategies used to assess and mitigate bias in primary health care algorithms related to individuals' personal or protected attributes. Objective: This study aimed to describe the attempts, strategies, and methods used to mitigate bias in AI models within primary health care, to identify the diverse groups or protected attributes considered, and to evaluate the results of these approaches on both bias reduction and AI model performance. Methods: We conducted a scoping review following Joanna Briggs Institute (JBI) guidelines, searching Medline (Ovid), CINAHL (EBSCO), PsycINFO (Ovid), and Web of Science databases for studies published between January 1, 2017, and November 15, 2022. Pairs of reviewers independently screened titles and abstracts, applied selection criteria, and performed full-text screening. Discrepancies regarding study inclusion were resolved by consensus. Following reporting standards for AI in health care, we extracted data on study objectives, model features, targeted diverse groups, mitigation strategies used, and results. Using the mixed methods appraisal tool, we appraised the quality of the studies. Results: After removing 585 duplicates, we screened 1018 titles and abstracts. From the remaining 189 full-text articles, we included 17 studies. The most frequently investigated protected attributes were race (or ethnicity), examined in 12 of the 17 studies, and sex (often identified as gender), typically classified as ``male versus female'' in 10 of the studies. We categorized bias mitigation approaches into four clusters: (1) modifying existing AI models or datasets, (2) sourcing data from electronic health records, (3) developing tools with a ``human-in-the-loop'' approach, and (4) identifying ethical principles for informed decision-making. Algorithmic preprocessing methods, such as relabeling and reweighing data, along with natural language processing techniques that extract data from unstructured notes, showed the greatest potential for bias mitigation. Other methods aimed at enhancing model fairness included group recalibration and the application of the equalized odds metric. However, these approaches sometimes exacerbated prediction errors across groups or led to overall model miscalibrations. Conclusions: The results suggest that biases toward diverse groups are more easily mitigated when data are open-sourced, multiple stakeholders are engaged, and during the algorithm's preprocessing stage. Further empirical studies that include a broader range of groups, such as Indigenous peoples in Canada, are needed to validate and expand upon these findings. Trial Registration: OSF Registry osf.io/9ngz5/; https://osf.io/9ngz5/ International Registered Report Identifier (IRRID): RR2-10.2196/46684 ", doi="10.2196/60269", url="https://www.jmir.org/2025/1/e60269" } @Article{info:doi/10.2196/58275, author="O'Malley, Andrew and Veenhuizen, Miriam and Ahmed, Ayla", title="Ensuring Appropriate Representation in Artificial Intelligence--Generated Medical Imagery: Protocol for a Methodological Approach to Address Skin Tone Bias", journal="JMIR AI", year="2024", month="Nov", day="27", volume="3", pages="e58275", keywords="artificial intelligence", keywords="generative AI", keywords="AI images", keywords="dermatology", keywords="anatomy", keywords="medical education", keywords="medical imaging", keywords="skin", keywords="skin tone", keywords="United States", keywords="educational material", keywords="psoriasis", keywords="digital imagery", abstract="Background: In medical education, particularly in anatomy and dermatology, generative artificial intelligence (AI) can be used to create customized illustrations. However, the underrepresentation of darker skin tones in medical textbooks and elsewhere, which serve as training data for AI, poses a significant challenge in ensuring diverse and inclusive educational materials. Objective: This study aims to evaluate the extent of skin tone diversity in AI-generated medical images and to test whether the representation of skin tones can be improved by modifying AI prompts to better reflect the demographic makeup of the US population. Methods: In total, 2 standard AI models (Dall-E [OpenAI] and Midjourney [Midjourney Inc]) each generated 100 images of people with psoriasis. In addition, a custom model was developed that incorporated a prompt injection aimed at ``forcing'' the AI (Dall-E 3) to reflect the skin tone distribution of the US population according to the 2012 American National Election Survey. This custom model generated another set of 100 images. The skin tones in these images were assessed by 3 researchers using the New Immigrant Survey skin tone scale, with the median value representing each image. A chi-square goodness of fit analysis compared the skin tone distributions from each set of images to that of the US population. Results: The standard AI models (Dalle-3 and Midjourney) demonstrated a significant difference between the expected skin tones of the US population and the observed tones in the generated images (P<.001). Both standard AI models overrepresented lighter skin. Conversely, the custom model with the modified prompt yielded a distribution of skin tones that closely matched the expected demographic representation, showing no significant difference (P=.04). Conclusions: This study reveals a notable bias in AI-generated medical images, predominantly underrepresenting darker skin tones. This bias can be effectively addressed by modifying AI prompts to incorporate real-life demographic distributions. The findings emphasize the need for conscious efforts in AI development to ensure diverse and representative outputs, particularly in educational and medical contexts. Users of generative AI tools should be aware that these biases exist, and that similar tendencies may also exist in other types of generative AI (eg, large language models) and in other characteristics (eg, sex, gender, culture, and ethnicity). Injecting demographic data into AI prompts may effectively counteract these biases, ensuring a more accurate representation of the general population. ", doi="10.2196/58275", url="https://ai.jmir.org/2024/1/e58275" } @Article{info:doi/10.2196/63445, author="Ralevski, Alexandra and Taiyab, Nadaa and Nossal, Michael and Mico, Lindsay and Piekos, Samantha and Hadlock, Jennifer", title="Using Large Language Models to Abstract Complex Social Determinants of Health From Original and Deidentified Medical Notes: Development and Validation Study", journal="J Med Internet Res", year="2024", month="Nov", day="19", volume="26", pages="e63445", keywords="housing instability", keywords="housing insecurity", keywords="housing", keywords="machine learning", keywords="artificial intelligence", keywords="AI", keywords="large language model", keywords="LLM", keywords="natural language processing", keywords="NLP", keywords="electronic health record", keywords="EHR", keywords="electronic medical record", keywords="EMR", keywords="social determinants of health", keywords="exposome", keywords="pregnancy", keywords="obstetric", keywords="deidentification", abstract="Background: Social determinants of health (SDoH) such as housing insecurity are known to be intricately linked to patients' health status. More efficient methods for abstracting structured data on SDoH can help accelerate the inclusion of exposome variables in biomedical research and support health care systems in identifying patients who could benefit from proactive outreach. Large language models (LLMs) developed from Generative Pre-trained Transformers (GPTs) have shown potential for performing complex abstraction tasks on unstructured clinical notes. Objective: Here, we assess the performance of GPTs on identifying temporal aspects of housing insecurity and compare results between both original and deidentified notes. Methods: We compared the ability of GPT-3.5 and GPT-4 to identify instances of both current and past housing instability, as well as general housing status, from 25,217 notes from 795 pregnant women. Results were compared with manual abstraction, a named entity recognition model, and regular expressions. Results: Compared with GPT-3.5 and the named entity recognition model, GPT-4 had the highest performance and had a much higher recall (0.924) than human abstractors (0.702) in identifying patients experiencing current or past housing instability, although precision was lower (0.850) compared with human abstractors (0.971). GPT-4's precision improved slightly (0.936 original, 0.939 deidentified) on deidentified versions of the same notes, while recall dropped (0.781 original, 0.704 deidentified). Conclusions: This work demonstrates that while manual abstraction is likely to yield slightly more accurate results overall, LLMs can provide a scalable, cost-effective solution with the advantage of greater recall. This could support semiautomated abstraction, but given the potential risk for harm, human review would be essential before using results for any patient engagement or care decisions. Furthermore, recall was lower when notes were deidentified prior to LLM abstraction. ", doi="10.2196/63445", url="https://www.jmir.org/2024/1/e63445" } @Article{info:doi/10.2196/53616, author="Chustecki, Margaret", title="Benefits and Risks of AI in Health Care: Narrative Review", journal="Interact J Med Res", year="2024", month="Nov", day="18", volume="13", pages="e53616", keywords="artificial intelligence", keywords="safety risks", keywords="biases", keywords="AI", keywords="benefit", keywords="risk", keywords="health care", keywords="safety", keywords="ethics", keywords="transparency", keywords="data privacy", keywords="accuracy", abstract="Background: The integration of artificial intelligence (AI) into health care has the potential to transform the industry, but it also raises ethical, regulatory, and safety concerns. This review paper provides an in-depth examination of the benefits and risks associated with AI in health care, with a focus on issues like biases, transparency, data privacy, and safety. Objective: This study aims to evaluate the advantages and drawbacks of incorporating AI in health care. This assessment centers on the potential biases in AI algorithms, transparency challenges, data privacy issues, and safety risks in health care settings. Methods: Studies included in this review were selected based on their relevance to AI applications in health care, focusing on ethical, regulatory, and safety considerations. Inclusion criteria encompassed peer-reviewed articles, reviews, and relevant research papers published in English. Exclusion criteria included non--peer-reviewed articles, editorials, and studies not directly related to AI in health care. A comprehensive literature search was conducted across 8 databases: OVID MEDLINE, OVID Embase, OVID PsycINFO, EBSCO CINAHL Plus with Full Text, ProQuest Sociological Abstracts, ProQuest Philosopher's Index, ProQuest Advanced Technologies \& Aerospace, and Wiley Cochrane Library. The search was last updated on June 23, 2023. Results were synthesized using qualitative methods to identify key themes and findings related to the benefits and risks of AI in health care. Results: The literature search yielded 8796 articles. After removing duplicates and applying the inclusion and exclusion criteria, 44 studies were included in the qualitative synthesis. This review highlights the significant promise that AI holds in health care, such as enhancing health care delivery by providing more accurate diagnoses, personalized treatment plans, and efficient resource allocation. However, persistent concerns remain, including biases ingrained in AI algorithms, a lack of transparency in decision-making, potential compromises of patient data privacy, and safety risks associated with AI implementation in clinical settings. Conclusions: In conclusion, while AI presents the opportunity for a health care revolution, it is imperative to address the ethical, regulatory, and safety challenges linked to its integration. Proactive measures are required to ensure that AI technologies are developed and deployed responsibly, striking a balance between innovation and the safeguarding of patient well-being. ", doi="10.2196/53616", url="https://www.i-jmr.org/2024/1/e53616" } @Article{info:doi/10.2196/63356, author="Abbasgholizadeh Rahimi, Samira and Shrivastava, Richa and Brown-Johnson, Anita and Caidor, Pascale and Davies, Claire and Idrissi Janati, Amal and Kengne Talla, Pascaline and Madathil, Sreenath and Willie, M. Bettina and Emami, Elham", title="EDAI Framework for Integrating Equity, Diversity, and Inclusion Throughout the Lifecycle of AI to Improve Health and Oral Health Care: Qualitative Study", journal="J Med Internet Res", year="2024", month="Nov", day="15", volume="26", pages="e63356", keywords="equity, diversity, and inclusion", keywords="EDI", keywords="health care", keywords="oral health care", keywords="machine learning", keywords="artificial intelligence", keywords="AI", abstract="Background: Recent studies have identified significant gaps in equity, diversity, and inclusion (EDI) considerations within the lifecycle of artificial intelligence (AI), spanning from data collection and problem definition to implementation stages. Despite the recognized need for integrating EDI principles, there is currently no existing guideline or framework to support this integration in the AI lifecycle. Objective: This study aimed to address this gap by identifying EDI principles and indicators to be integrated into the AI lifecycle. The goal was to develop a comprehensive guiding framework to guide the development and implementation of future AI systems. Methods: This study was conducted in 3 phases. In phase 1, a comprehensive systematic scoping review explored how EDI principles have been integrated into AI in health and oral health care settings. In phase 2, a multidisciplinary team was established, and two 2-day, in-person international workshops with over 60 representatives from diverse backgrounds, expertise, and communities were conducted. The workshops included plenary presentations, round table discussions, and focused group discussions. In phase 3, based on the workshops' insights, the EDAI framework was developed and refined through iterative feedback from participants. The results of the initial systematic scoping review have been published separately, and this paper focuses on subsequent phases of the project, which is related to framework development. Results: In this study, we developed the EDAI framework, a comprehensive guideline that integrates EDI principles and indicators throughout the entire AI lifecycle. This framework addresses existing gaps at various stages, from data collection to implementation, and focuses on individual, organizational, and systemic levels. Additionally, we identified both the facilitators and barriers to integrating EDI within the AI lifecycle in health and oral health care. Conclusions: The developed EDAI framework provides a comprehensive, actionable guideline for integrating EDI principles into AI development and deployment. By facilitating the systematic incorporation of these principles, the framework supports the creation and implementation of AI systems that are not only technologically advanced but also sensitive to EDI principles. ", doi="10.2196/63356", url="https://www.jmir.org/2024/1/e63356", url="http://www.ncbi.nlm.nih.gov/pubmed/39546793" } @Article{info:doi/10.2196/22769, author="Wang, Leyao and Wan, Zhiyu and Ni, Congning and Song, Qingyuan and Li, Yang and Clayton, Ellen and Malin, Bradley and Yin, Zhijun", title="Applications and Concerns of ChatGPT and Other Conversational Large Language Models in Health Care: Systematic Review", journal="J Med Internet Res", year="2024", month="Nov", day="7", volume="26", pages="e22769", keywords="large language model", keywords="ChatGPT", keywords="artificial intelligence", keywords="natural language processing", keywords="health care", keywords="summarization", keywords="medical knowledge inquiry", keywords="reliability", keywords="bias", keywords="privacy", abstract="Background: The launch of ChatGPT (OpenAI) in November 2022 attracted public attention and academic interest to large language models (LLMs), facilitating the emergence of many other innovative LLMs. These LLMs have been applied in various fields, including health care. Numerous studies have since been conducted regarding how to use state-of-the-art LLMs in health-related scenarios. Objective: This review aims to summarize applications of and concerns regarding conversational LLMs in health care and provide an agenda for future research in this field. Methods: We used PubMed, ACM, and the IEEE digital libraries as primary sources for this review. We followed the guidance of PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) to screen and select peer-reviewed research articles that (1) were related to health care applications and conversational LLMs and (2) were published before September 1, 2023, the date when we started paper collection. We investigated these papers and classified them according to their applications and concerns. Results: Our search initially identified 820 papers according to targeted keywords, out of which 65 (7.9\%) papers met our criteria and were included in the review. The most popular conversational LLM was ChatGPT (60/65, 92\% of papers), followed by Bard (Google LLC; 1/65, 2\% of papers), LLaMA (Meta; 1/65, 2\% of papers), and other LLMs (6/65, 9\% papers). These papers were classified into four categories of applications: (1) summarization, (2) medical knowledge inquiry, (3) prediction (eg, diagnosis, treatment recommendation, and drug synergy), and (4) administration (eg, documentation and information collection), and four categories of concerns: (1) reliability (eg, training data quality, accuracy, interpretability, and consistency in responses), (2) bias, (3) privacy, and (4) public acceptability. There were 49 (75\%) papers using LLMs for either summarization or medical knowledge inquiry, or both, and there are 58 (89\%) papers expressing concerns about either reliability or bias, or both. We found that conversational LLMs exhibited promising results in summarization and providing general medical knowledge to patients with a relatively high accuracy. However, conversational LLMs such as ChatGPT are not always able to provide reliable answers to complex health-related tasks (eg, diagnosis) that require specialized domain expertise. While bias or privacy issues are often noted as concerns, no experiments in our reviewed papers thoughtfully examined how conversational LLMs lead to these issues in health care research. Conclusions: Future studies should focus on improving the reliability of LLM applications in complex health-related tasks, as well as investigating the mechanisms of how LLM applications bring bias and privacy issues. Considering the vast accessibility of LLMs, legal, social, and technical efforts are all needed to address concerns about LLMs to promote, improve, and regularize the application of LLMs in health care. ", doi="10.2196/22769", url="https://www.jmir.org/2024/1/e22769" } @Article{info:doi/10.2196/53207, author="Rosenbacke, Rikard and Melhus, {\AA}sa and McKee, Martin and Stuckler, David", title="How Explainable Artificial Intelligence Can Increase or Decrease Clinicians' Trust in AI Applications in Health Care: Systematic Review", journal="JMIR AI", year="2024", month="Oct", day="30", volume="3", pages="e53207", keywords="explainable artificial intelligence", keywords="XAI", keywords="trustworthy AI", keywords="clinician trust", keywords="affect-based measures", keywords="cognitive measures", keywords="clinical use", keywords="clinical decision-making", keywords="clinical informatics", abstract="Background: Artificial intelligence (AI) has significant potential in clinical practice. However, its ``black box'' nature can lead clinicians to question its value. The challenge is to create sufficient trust for clinicians to feel comfortable using AI, but not so much that they defer to it even when it produces results that conflict with their clinical judgment in ways that lead to incorrect decisions. Explainable AI (XAI) aims to address this by providing explanations of how AI algorithms reach their conclusions. However, it remains unclear whether such explanations foster an appropriate degree of trust to ensure the optimal use of AI in clinical practice. Objective: This study aims to systematically review and synthesize empirical evidence on the impact of XAI on clinicians' trust in AI-driven clinical decision-making. Methods: A systematic review was conducted in accordance with PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) guidelines, searching PubMed and Web of Science databases. Studies were included if they empirically measured the impact of XAI on clinicians' trust using cognition- or affect-based measures. Out of 778 articles screened, 10 met the inclusion criteria. We assessed the risk of bias using standard tools appropriate to the methodology of each paper. Results: The risk of bias in all papers was moderate or moderate to high. All included studies operationalized trust primarily through cognitive-based definitions, with 2 also incorporating affect-based measures. Out of these, 5 studies reported that XAI increased clinicians' trust compared with standard AI, particularly when the explanations were clear, concise, and relevant to clinical practice. In addition, 3 studies found no significant effect of XAI on trust, and the presence of explanations does not automatically improve trust. Notably, 2 studies highlighted that XAI could either enhance or diminish trust, depending on the complexity and coherence of the provided explanations. The majority of studies suggest that XAI has the potential to enhance clinicians' trust in recommendations generated by AI. However, complex or contradictory explanations can undermine this trust. More critically, trust in AI is not inherently beneficial, as AI recommendations are not infallible. These findings underscore the nuanced role of explanation quality and suggest that trust can be modulated through the careful design of XAI systems. Conclusions: Excessive trust in incorrect advice generated by AI can adversely impact clinical accuracy, just as can happen when correct advice is distrusted. Future research should focus on refining both cognitive and affect-based measures of trust and on developing strategies to achieve an appropriate balance in terms of trust, preventing both blind trust and undue skepticism. Optimizing trust in AI systems is essential for their effective integration into clinical practice. ", doi="10.2196/53207", url="https://ai.jmir.org/2024/1/e53207" } @Article{info:doi/10.2196/62678, author="Ball Dunlap, A. Patricia and Michalowski, Martin", title="Advancing AI Data Ethics in Nursing: Future Directions for Nursing Practice, Research, and Education", journal="JMIR Nursing", year="2024", month="Oct", day="25", volume="7", pages="e62678", keywords="artificial intelligence", keywords="AI data ethics", keywords="data-centric AI", keywords="nurses", keywords="nursing informatics", keywords="machine learning", keywords="data literacy", keywords="health care AI", keywords="responsible AI", doi="10.2196/62678", url="https://nursing.jmir.org/2024/1/e62678" } @Article{info:doi/10.2196/58011, author="Elyoseph, Zohar and Gur, Tamar and Haber, Yuval and Simon, Tomer and Angert, Tal and Navon, Yuval and Tal, Amir and Asman, Oren", title="An Ethical Perspective on the Democratization of Mental Health With Generative AI", journal="JMIR Ment Health", year="2024", month="Oct", day="17", volume="11", pages="e58011", keywords="ethics", keywords="generative artificial intelligence", keywords="generative AI", keywords="mental health", keywords="ChatGPT", keywords="large language model", keywords="LLM", keywords="digital mental health", keywords="machine learning", keywords="AI", keywords="technology", keywords="accessibility", keywords="knowledge", keywords="GenAI", doi="10.2196/58011", url="https://mental.jmir.org/2024/1/e58011" } @Article{info:doi/10.2196/53505, author="Germani, Federico and Spitale, Giovanni and Biller-Andorno, Nikola", title="The Dual Nature of AI in Information Dissemination: Ethical Considerations", journal="JMIR AI", year="2024", month="Oct", day="15", volume="3", pages="e53505", keywords="AI", keywords="bioethics", keywords="infodemic management", keywords="disinformation", keywords="artificial intelligence", keywords="ethics", keywords="ethical", keywords="infodemic", keywords="infodemics", keywords="public health", keywords="misinformation", keywords="information dissemination", keywords="information literacy", doi="10.2196/53505", url="https://ai.jmir.org/2024/1/e53505", url="http://www.ncbi.nlm.nih.gov/pubmed/39405099" } @Article{info:doi/10.2196/58493, author="Tavory, Tamar", title="Regulating AI in Mental Health: Ethics of Care Perspective", journal="JMIR Ment Health", year="2024", month="Sep", day="19", volume="11", pages="e58493", keywords="artificial intelligence", keywords="ethics of care", keywords="regulation", keywords="legal", keywords="relationship", keywords="mental health", keywords="mental healthcare", keywords="AI", keywords="ethic", keywords="ethics", keywords="ethical", keywords="regulations", keywords="law", keywords="framework", keywords="frameworks", keywords="regulatory", keywords="relationships", keywords="chatbot", keywords="chatbots", keywords="conversational agent", keywords="conversational agents", keywords="European Artificial Intelligence Act", doi="10.2196/58493", url="https://mental.jmir.org/2024/1/e58493" } @Article{info:doi/10.2196/49795, author="Lorenzini, Giorgia and Arbelaez Ossa, Laura and Milford, Stephen and Elger, Simone Bernice and Shaw, Martin David and De Clercq, Eva", title="The ``Magical Theory'' of AI in Medicine: Thematic Narrative Analysis", journal="JMIR AI", year="2024", month="Aug", day="19", volume="3", pages="e49795", keywords="artificial intelligence", keywords="medicine", keywords="physicians", keywords="hype", keywords="narratives", keywords="qualitative research", abstract="Background: The discourse surrounding medical artificial intelligence (AI) often focuses on narratives that either hype the technology's potential or predict dystopian futures. AI narratives have a significant influence on the direction of research, funding, and public opinion and thus shape the future of medicine. Objective: The paper aims to offer critical reflections on AI narratives, with a specific focus on medical AI, and to raise awareness as to how people working with medical AI talk about AI and discharge their ``narrative responsibility.'' Methods: Qualitative semistructured interviews were conducted with 41 participants from different disciplines who were exposed to medical AI in their profession. The research represents a secondary analysis of data using a thematic narrative approach. The analysis resulted in 2 main themes, each with 2 other subthemes. Results: Stories about the AI-physician interaction depicted either a competitive or collaborative relationship. Some participants argued that AI might replace physicians, as it performs better than physicians. However, others believed that physicians should not be replaced and that AI should rather assist and support physicians. The idea of excessive technological deferral and automation bias was discussed, highlighting the risk of ``losing'' decisional power. The possibility that AI could relieve physicians from burnout and allow them to spend more time with patients was also considered. Finally, a few participants reported an extremely optimistic account of medical AI, while the majority criticized this type of story. The latter lamented the existence of a ``magical theory'' of medical AI, identified with techno-solutionist positions. Conclusions: Most of the participants reported a nuanced view of technology, recognizing both its benefits and challenges and avoiding polarized narratives. However, some participants did contribute to the hype surrounding medical AI, comparing it to human capabilities and depicting it as superior. Overall, the majority agreed that medical AI should assist rather than replace clinicians. The study concludes that a balanced narrative (that focuses on the technology's present capabilities and limitations) is necessary to fully realize the potential of medical AI while avoiding unrealistic expectations and hype. ", doi="10.2196/49795", url="https://ai.jmir.org/2024/1/e49795", url="http://www.ncbi.nlm.nih.gov/pubmed/39158953" } @Article{info:doi/10.2196/46871, author="Han, Yu and Ceross, Aaron and Bergmann, Jeroen", title="Regulatory Frameworks for AI-Enabled Medical Device Software in China: Comparative Analysis and Review of Implications for Global Manufacturer", journal="JMIR AI", year="2024", month="Jul", day="29", volume="3", pages="e46871", keywords="NMPA", keywords="medical device software", keywords="device registration", keywords="registration pathway", keywords="artificial intelligence", keywords="machine learning", keywords="medical device", keywords="device development", keywords="China", keywords="regulations", keywords="medical software", doi="10.2196/46871", url="https://ai.jmir.org/2024/1/e46871", url="http://www.ncbi.nlm.nih.gov/pubmed/39073860" } @Article{info:doi/10.2196/55957, author="Bragazzi, Luigi Nicola and Garbarino, Sergio", title="Toward Clinical Generative AI: Conceptual Framework", journal="JMIR AI", year="2024", month="Jun", day="7", volume="3", pages="e55957", keywords="clinical intelligence", keywords="artificial intelligence", keywords="iterative process", keywords="abduction", keywords="benchmarking", keywords="verification paradigms", doi="10.2196/55957", url="https://ai.jmir.org/2024/1/e55957", url="http://www.ncbi.nlm.nih.gov/pubmed/38875592" } @Article{info:doi/10.2196/54501, author="Jordan, Alexis and Park, Albert", title="Understanding the Long Haulers of COVID-19: Mixed Methods Analysis of YouTube Content", journal="JMIR AI", year="2024", month="Jun", day="3", volume="3", pages="e54501", keywords="long haulers", keywords="post--COVID-19 condition", keywords="COVID-19", keywords="YouTube", keywords="topic modeling", keywords="natural language processing", abstract="Background: The COVID-19 pandemic had a devastating global impact. In the United States, there were >98 million COVID-19 cases and >1 million resulting deaths. One consequence of COVID-19 infection has been post--COVID-19 condition (PCC). People with this syndrome, colloquially called long haulers, experience symptoms that impact their quality of life. The root cause of PCC and effective treatments remains unknown. Many long haulers have turned to social media for support and guidance. Objective: In this study, we sought to gain a better understanding of the long hauler experience by investigating what has been discussed and how information about long haulers is perceived on social media. We specifically investigated the following: (1) the range of symptoms that are discussed, (2) the ways in which information about long haulers is perceived, (3) informational and emotional support that is available to long haulers, and (4) discourse between viewers and creators. We selected YouTube as our data source due to its popularity and wide range of audience. Methods: We systematically gathered data from 3 different types of content creators: medical sources, news sources, and long haulers. To computationally understand the video content and viewers' reactions, we used Biterm, a topic modeling algorithm created specifically for short texts, to analyze snippets of video transcripts and all top-level comments from the comment section. To triangulate our findings about viewers' reactions, we used the Valence Aware Dictionary and Sentiment Reasoner to conduct sentiment analysis on comments from each type of content creator. We grouped the comments into positive and negative categories and generated topics for these groups using Biterm. We then manually grouped resulting topics into broader themes for the purpose of analysis. Results: We organized the resulting topics into 28 themes across all sources. Examples of medical source transcript themes were Explanations in layman's terms and Biological explanations. Examples of news source transcript themes were Negative experiences and handling the long haul. The 2 long hauler transcript themes were Taking treatments into own hands and Changes to daily life. News sources received a greater share of negative comments. A few themes of these negative comments included Misinformation and disinformation and Issues with the health care system. Similarly, negative long hauler comments were organized into several themes, including Disillusionment with the health care system and Requiring more visibility. In contrast, positive medical source comments captured themes such as Appreciation of helpful content and Exchange of helpful information. In addition to this theme, one positive theme found in long hauler comments was Community building. Conclusions: The results of this study could help public health agencies, policy makers, organizations, and health researchers understand symptomatology and experiences related to PCC. They could also help these agencies develop their communication strategy concerning PCC. ", doi="10.2196/54501", url="https://ai.jmir.org/2024/1/e54501", url="http://www.ncbi.nlm.nih.gov/pubmed/38875666" } @Article{info:doi/10.2196/51834, author="Quttainah, Majdi and Mishra, Vinaytosh and Madakam, Somayya and Lurie, Yotam and Mark, Shlomo", title="Cost, Usability, Credibility, Fairness, Accountability, Transparency, and Explainability Framework for Safe and Effective Large Language Models in Medical Education: Narrative Review and Qualitative Study", journal="JMIR AI", year="2024", month="Apr", day="23", volume="3", pages="e51834", keywords="large language model", keywords="LLM", keywords="ChatGPT", keywords="CUC-FATE framework", keywords="cost, usability, credibility, fairness, accountability, transparency, and explainability", keywords="analytical hierarchy process", keywords="AHP", keywords="total interpretive structural modeling", keywords="TISM", keywords="medical education", keywords="adoption", keywords="guideline", keywords="development", keywords="health care", keywords="chat generative pretrained transformer", keywords="generative language model tool", keywords="user", keywords="innovation", keywords="data generation", keywords="narrative review", keywords="health care professional", abstract="Background: The world has witnessed increased adoption of large language models (LLMs) in the last year. Although the products developed using LLMs have the potential to solve accessibility and efficiency problems in health care, there is a lack of available guidelines for developing LLMs for health care, especially for medical education. Objective: The aim of this study was to identify and prioritize the enablers for developing successful LLMs for medical education. We further evaluated the relationships among these identified enablers. Methods: A narrative review of the extant literature was first performed to identify the key enablers for LLM development. We additionally gathered the opinions of LLM users to determine the relative importance of these enablers using an analytical hierarchy process (AHP), which is a multicriteria decision-making method. Further, total interpretive structural modeling (TISM) was used to analyze the perspectives of product developers and ascertain the relationships and hierarchy among these enablers. Finally, the cross-impact matrix-based multiplication applied to a classification (MICMAC) approach was used to determine the relative driving and dependence powers of these enablers. A nonprobabilistic purposive sampling approach was used for recruitment of focus groups. Results: The AHP demonstrated that the most important enabler for LLMs was credibility, with a priority weight of 0.37, followed by accountability (0.27642) and fairness (0.10572). In contrast, usability, with a priority weight of 0.04, showed negligible importance. The results of TISM concurred with the findings of the AHP. The only striking difference between expert perspectives and user preference evaluation was that the product developers indicated that cost has the least importance as a potential enabler. The MICMAC analysis suggested that cost has a strong influence on other enablers. The inputs of the focus group were found to be reliable, with a consistency ratio less than 0.1 (0.084). Conclusions: This study is the first to identify, prioritize, and analyze the relationships of enablers of effective LLMs for medical education. Based on the results of this study, we developed a comprehendible prescriptive framework, named CUC-FATE (Cost, Usability, Credibility, Fairness, Accountability, Transparency, and Explainability), for evaluating the enablers of LLMs in medical education. The study findings are useful for health care professionals, health technology experts, medical technology regulators, and policy makers. ", doi="10.2196/51834", url="https://ai.jmir.org/2024/1/e51834", url="http://www.ncbi.nlm.nih.gov/pubmed/38875562" } @Article{info:doi/10.2196/40781, author="Waheed, Atif Muhammad and Liu, Lu", title="Perceptions of Family Physicians About Applying AI in Primary Health Care: Case Study From a Premier Health Care Organization", journal="JMIR AI", year="2024", month="Apr", day="17", volume="3", pages="e40781", keywords="AI", keywords="artificial intelligence", keywords="perception", keywords="attitude", keywords="opinion", keywords="surveys and questionnaires", keywords="family physician", keywords="primary care", keywords="health care service provider", keywords="health care professional", keywords="ethical", keywords="AI decision-making", keywords="AI challenges", abstract="Background: The COVID-19 pandemic has led to the rapid proliferation of artificial intelligence (AI), which was not previously anticipated; this is an unforeseen development. The use of AI in health care settings is increasing, as it proves to be a promising tool for transforming health care systems, improving operational and business processes, and efficiently simplifying health care tasks for family physicians and health care administrators. Therefore, it is necessary to assess the perspective of family physicians on AI and its impact on their job roles. Objective: This study aims to determine the impact of AI on the management and practices of Qatar's Primary Health Care Corporation (PHCC) in improving health care tasks and service delivery. Furthermore, it seeks to evaluate the impact of AI on family physicians' job roles, including associated risks and ethical ramifications from their perspective. Methods: We conducted a cross-sectional survey and sent a web-based questionnaire survey link to 724 practicing family physicians at the PHCC. In total, we received 102 eligible responses. Results: Of the 102 respondents, 72 (70.6\%) were men and 94 (92.2\%) were aged between 35 and 54 years. In addition, 58 (56.9\%) of the 102 respondents were consultants. The overall awareness of AI was 80 (78.4\%) out of 102, with no difference between gender (P=.06) and age groups (P=.12). AI is perceived to play a positive role in improving health care practices at PHCC (P<.001), managing health care tasks (P<.001), and positively impacting health care service delivery (P<.001). Family physicians also perceived that their clinical, administrative, and opportunistic health care management roles were positively influenced by AI (P<.001). Furthermore, perceptions of family physicians indicate that AI improves operational and human resource management (P<.001), does not undermine patient-physician relationships (P<.001), and is not considered superior to human physicians in the clinical judgment process (P<.001). However, its inclusion is believed to decrease patient satisfaction (P<.001). AI decision-making and accountability were recognized as ethical risks, along with data protection and confidentiality. The optimism regarding using AI for future medical decisions was low among family physicians. Conclusions: This study indicated a positive perception among family physicians regarding AI integration into primary care settings. AI demonstrates significant potential for enhancing health care task management and overall service delivery at the PHCC. It augments family physicians' roles without replacing them and proves beneficial for operational efficiency, human resource management, and public health during pandemics. While the implementation of AI is anticipated to bring benefits, the careful consideration of ethical, privacy, confidentiality, and patient-centric concerns is essential. These insights provide valuable guidance for the strategic integration of AI into health care systems, with a focus on maintaining high-quality patient care and addressing the multifaceted challenges that arise during this transformative process. ", doi="10.2196/40781", url="https://ai.jmir.org/2024/1/e40781", url="http://www.ncbi.nlm.nih.gov/pubmed/38875531" } @Article{info:doi/10.2196/47652, author="Sp{\"a}th, Julian and Sewald, Zeno and Probul, Niklas and Berland, Magali and Almeida, Mathieu and Pons, Nicolas and Le Chatelier, Emmanuelle and Gin{\`e}s, Pere and Sol{\'e}, Cristina and Juanola, Adri{\`a} and Pauling, Josch and Baumbach, Jan", title="Privacy-Preserving Federated Survival Support Vector Machines for Cross-Institutional Time-To-Event Analysis: Algorithm Development and Validation", journal="JMIR AI", year="2024", month="Mar", day="29", volume="3", pages="e47652", keywords="federated learning", keywords="survival analysis", keywords="support vector machine", keywords="machine learning", keywords="federated", keywords="algorithm", keywords="survival", keywords="FeatureCloud", keywords="predict", keywords="predictive", keywords="prediction", keywords="predictions", keywords="Implementation science", keywords="Implementation", keywords="centralized model", keywords="privacy regulation", abstract="Background: Central collection of distributed medical patient data is problematic due to strict privacy regulations. Especially in clinical environments, such as clinical time-to-event studies, large sample sizes are critical but usually not available at a single institution. It has been shown recently that federated learning, combined with privacy-enhancing technologies, is an excellent and privacy-preserving alternative to data sharing. Objective: This study aims to develop and validate a privacy-preserving, federated survival support vector machine (SVM) and make it accessible for researchers to perform cross-institutional time-to-event analyses. Methods: We extended the survival SVM algorithm to be applicable in federated environments. We further implemented it as a FeatureCloud app, enabling it to run in the federated infrastructure provided by the FeatureCloud platform. Finally, we evaluated our algorithm on 3 benchmark data sets, a large sample size synthetic data set, and a real-world microbiome data set and compared the results to the corresponding central method. Results: Our federated survival SVM produces highly similar results to the centralized model on all data sets. The maximal difference between the model weights of the central model and the federated model was only 0.001, and the mean difference over all data sets was 0.0002. We further show that by including more data in the analysis through federated learning, predictions are more accurate even in the presence of site-dependent batch effects. Conclusions: The federated survival SVM extends the palette of federated time-to-event analysis methods by a robust machine learning approach. To our knowledge, the implemented FeatureCloud app is the first publicly available implementation of a federated survival SVM, is freely accessible for all kinds of researchers, and can be directly used within the FeatureCloud platform. ", doi="10.2196/47652", url="https://ai.jmir.org/2024/1/e47652", url="http://www.ncbi.nlm.nih.gov/pubmed/38875678" } @Article{info:doi/10.2196/52054, author="Wiepert, Daniela and Malin, A. Bradley and Duffy, R. Joseph and Utianski, L. Rene and Stricker, L. John and Jones, T. David and Botha, Hugo", title="Reidentification of Participants in Shared Clinical Data Sets: Experimental Study", journal="JMIR AI", year="2024", month="Mar", day="15", volume="3", pages="e52054", keywords="reidentification", keywords="privacy", keywords="adversarial attack", keywords="health care", keywords="speech disorders", keywords="voiceprint", abstract="Background: Large curated data sets are required to leverage speech-based tools in health care. These are costly to produce, resulting in increased interest in data sharing. As speech can potentially identify speakers (ie, voiceprints), sharing recordings raises privacy concerns. This is especially relevant when working with patient data protected under the Health Insurance Portability and Accountability Act. Objective: We aimed to determine the reidentification risk for speech recordings, without reference to demographics or metadata, in clinical data sets considering both the size of the search space (ie, the number of comparisons that must be considered when reidentifying) and the nature of the speech recording (ie, the type of speech task). Methods: Using a state-of-the-art speaker identification model, we modeled an adversarial attack scenario in which an adversary uses a large data set of identified speech (hereafter, the known set) to reidentify as many unknown speakers in a shared data set (hereafter, the unknown set) as possible. We first considered the effect of search space size by attempting reidentification with various sizes of known and unknown sets using VoxCeleb, a data set with recordings of natural, connected speech from >7000 healthy speakers. We then repeated these tests with different types of recordings in each set to examine whether the nature of a speech recording influences reidentification risk. For these tests, we used our clinical data set composed of recordings of elicited speech tasks from 941 speakers. Results: We found that the risk was inversely related to the number of comparisons an adversary must consider (ie, the search space), with a positive linear correlation between the number of false acceptances (FAs) and the number of comparisons (r=0.69; P<.001). The true acceptances (TAs) stayed relatively stable, and the ratio between FAs and TAs rose from 0.02 at 1 {\texttimes} 105 comparisons to 1.41 at 6 {\texttimes} 106 comparisons, with a near 1:1 ratio at the midpoint of 3 {\texttimes} 106 comparisons. In effect, risk was high for a small search space but dropped as the search space grew. We also found that the nature of a speech recording influenced reidentification risk, with nonconnected speech (eg, vowel prolongation: FA/TA=98.5; alternating motion rate: FA/TA=8) being harder to identify than connected speech (eg, sentence repetition: FA/TA=0.54) in cross-task conditions. The inverse was mostly true in within-task conditions, with the FA/TA ratio for vowel prolongation and alternating motion rate dropping to 0.39 and 1.17, respectively. Conclusions: Our findings suggest that speaker identification models can be used to reidentify participants in specific circumstances, but in practice, the reidentification risk appears small. The variation in risk due to search space size and type of speech task provides actionable recommendations to further increase participant privacy and considerations for policy regarding public release of speech recordings. ", doi="10.2196/52054", url="https://ai.jmir.org/2024/1/e52054", url="http://www.ncbi.nlm.nih.gov/pubmed/38875581" } @Article{info:doi/10.2196/47240, author="Lu, Jiahui and Zhang, Huibin and Xiao, Yi and Wang, Yingyu", title="An Environmental Uncertainty Perception Framework for Misinformation Detection and Spread Prediction in the COVID-19 Pandemic: Artificial Intelligence Approach", journal="JMIR AI", year="2024", month="Jan", day="29", volume="3", pages="e47240", keywords="misinformation detection", keywords="misinformation spread prediction", keywords="uncertainty", keywords="COVID-19", keywords="information environment", abstract="Background: Amidst the COVID-19 pandemic, misinformation on social media has posed significant threats to public health. Detecting and predicting the spread of misinformation are crucial for mitigating its adverse effects. However, prevailing frameworks for these tasks have predominantly focused on post-level signals of misinformation, neglecting features of the broader information environment where misinformation originates and proliferates. Objective: This study aims to create a novel framework that integrates the uncertainty of the information environment into misinformation features, with the goal of enhancing the model's accuracy in tasks such as misinformation detection and predicting the scale of dissemination. The objective is to provide better support for online governance efforts during health crises. Methods: In this study, we embraced uncertainty features within the information environment and introduced a novel Environmental Uncertainty Perception (EUP) framework for the detection of misinformation and the prediction of its spread on social media. The framework encompasses uncertainty at 4 scales of the information environment: physical environment, macro-media environment, micro-communicative environment, and message framing. We assessed the effectiveness of the EUP using real-world COVID-19 misinformation data sets. Results: The experimental results demonstrated that the EUP alone achieved notably good performance, with detection accuracy at 0.753 and prediction accuracy at 0.71. These results were comparable to state-of-the-art baseline models such as bidirectional long short-term memory (BiLSTM; detection accuracy 0.733 and prediction accuracy 0.707) and bidirectional encoder representations from transformers (BERT; detection accuracy 0.755 and prediction accuracy 0.728). Additionally, when the baseline models collaborated with the EUP, they exhibited improved accuracy by an average of 1.98\% for the misinformation detection and 2.4\% for spread-prediction tasks. On unbalanced data sets, the EUP yielded relative improvements of 21.5\% and 5.7\% in macro-F1-score and area under the curve, respectively. Conclusions: This study makes a significant contribution to the literature by recognizing uncertainty features within information environments as a crucial factor for improving misinformation detection and spread-prediction algorithms during the pandemic. The research elaborates on the complexities of uncertain information environments for misinformation across 4 distinct scales, including the physical environment, macro-media environment, micro-communicative environment, and message framing. The findings underscore the effectiveness of incorporating uncertainty into misinformation detection and spread prediction, providing an interdisciplinary and easily implementable framework for the field. ", doi="10.2196/47240", url="https://ai.jmir.org/2024/1/e47240", url="http://www.ncbi.nlm.nih.gov/pubmed/38875583" } @Article{info:doi/10.2196/49082, author="Hansen, Steffan and Brandt, Joakim Carl and S{\o}ndergaard, Jens", title="Beyond the Hype---The Actual Role and Risks of AI in Today's Medical Practice: Comparative-Approach Study", journal="JMIR AI", year="2024", month="Jan", day="22", volume="3", pages="e49082", keywords="AI", keywords="artificial intelligence", keywords="ChatGPT-4", keywords="Microsoft Bing", keywords="general practice", keywords="ChatGPT", keywords="chatbot", keywords="chatbots", keywords="writing", keywords="academic", keywords="academia", keywords="Bing", abstract="Background: The evolution of artificial intelligence (AI) has significantly impacted various sectors, with health care witnessing some of its most groundbreaking contributions. Contemporary models, such as ChatGPT-4 and Microsoft Bing, have showcased capabilities beyond just generating text, aiding in complex tasks like literature searches and refining web-based queries. Objective: This study explores a compelling query: can AI author an academic paper independently? Our assessment focuses on four core dimensions: relevance (to ensure that AI's response directly addresses the prompt), accuracy (to ascertain that AI's information is both factually correct and current), clarity (to examine AI's ability to present coherent and logical ideas), and tone and style (to evaluate whether AI can align with the formality expected in academic writings). Additionally, we will consider the ethical implications and practicality of integrating AI into academic writing. Methods: To assess the capabilities of ChatGPT-4 and Microsoft Bing in the context of academic paper assistance in general practice, we used a systematic approach. ChatGPT-4, an advanced AI language model by Open AI, excels in generating human-like text and adapting responses based on user interactions, though it has a knowledge cut-off in September 2021. Microsoft Bing's AI chatbot facilitates user navigation on the Bing search engine, offering tailored search Results: In terms of relevance, ChatGPT-4 delved deeply into AI's health care role, citing academic sources and discussing diverse applications and concerns, while Microsoft Bing provided a concise, less detailed overview. In terms of accuracy, ChatGPT-4 correctly cited 72\% (23/32) of its peer-reviewed articles but included some nonexistent references. Microsoft Bing's accuracy stood at 46\% (6/13), supplemented by relevant non--peer-reviewed articles. In terms of clarity, both models conveyed clear, coherent text. ChatGPT-4 was particularly adept at detailing technical concepts, while Microsoft Bing was more general. In terms of tone, both models maintained an academic tone, but ChatGPT-4 exhibited superior depth and breadth in content delivery. Conclusions: Comparing ChatGPT-4 and Microsoft Bing for academic assistance revealed strengths and limitations. ChatGPT-4 excels in depth and relevance but falters in citation accuracy. Microsoft Bing is concise but lacks robust detail. Though both models have potential, neither can independently handle comprehensive academic tasks. As AI evolves, combining ChatGPT-4's depth with Microsoft Bing's up-to-date referencing could optimize academic support. Researchers should critically assess AI outputs to maintain academic credibility. ", doi="10.2196/49082", url="https://ai.jmir.org/2024/1/e49082" } @Article{info:doi/10.2196/51204, author="Weidener, Lukas and Fischer, Michael", title="Role of Ethics in Developing AI-Based Applications in Medicine: Insights From Expert Interviews and Discussion of Implications", journal="JMIR AI", year="2024", month="Jan", day="12", volume="3", pages="e51204", keywords="artificial intelligence", keywords="AI", keywords="medicine", keywords="ethics", keywords="expert interviews", keywords="AI development", keywords="AI ethics", abstract="Background: The integration of artificial intelligence (AI)--based applications in the medical field has increased significantly, offering potential improvements in patient care and diagnostics. However, alongside these advancements, there is growing concern about ethical considerations, such as bias, informed consent, and trust in the development of these technologies. Objective: This study aims to assess the role of ethics in the development of AI-based applications in medicine. Furthermore, this study focuses on the potential consequences of neglecting ethical considerations in AI development, particularly their impact on patients and physicians. Methods: Qualitative content analysis was used to analyze the responses from expert interviews. Experts were selected based on their involvement in the research or practical development of AI-based applications in medicine for at least 5 years, leading to the inclusion of 7 experts in the study. Results: The analysis revealed 3 main categories and 7 subcategories reflecting a wide range of views on the role of ethics in AI development. This variance underscores the subjectivity and complexity of integrating ethics into the development of AI in medicine. Although some experts view ethics as fundamental, others prioritize performance and efficiency, with some perceiving ethics as potential obstacles to technological progress. This dichotomy of perspectives clearly emphasizes the subjectivity and complexity surrounding the role of ethics in AI development, reflecting the inherent multifaceted nature of this issue. Conclusions: Despite the methodological limitations impacting the generalizability of the results, this study underscores the critical importance of consistent and integrated ethical considerations in AI development for medical applications. It advocates further research into effective strategies for ethical AI development, emphasizing the need for transparent and responsible practices, consideration of diverse data sources, physician training, and the establishment of comprehensive ethical and legal frameworks. ", doi="10.2196/51204", url="https://ai.jmir.org/2024/1/e51204", url="http://www.ncbi.nlm.nih.gov/pubmed/38875585" } @Article{info:doi/10.2196/52888, author="Hendricks-Sturrup, Rachele and Simmons, Malaika and Anders, Shilo and Aneni, Kammarauche and Wright Clayton, Ellen and Coco, Joseph and Collins, Benjamin and Heitman, Elizabeth and Hussain, Sajid and Joshi, Karuna and Lemieux, Josh and Lovett Novak, Laurie and Rubin, J. Daniel and Shanker, Anil and Washington, Talitha and Waters, Gabriella and Webb Harris, Joyce and Yin, Rui and Wagner, Teresa and Yin, Zhijun and Malin, Bradley", title="Developing Ethics and Equity Principles, Terms, and Engagement Tools to Advance Health Equity and Researcher Diversity in AI and Machine Learning: Modified Delphi Approach", journal="JMIR AI", year="2023", month="Dec", day="6", volume="2", pages="e52888", keywords="artificial intelligence", keywords="AI", keywords="Delphi", keywords="disparities", keywords="disparity", keywords="engagement", keywords="equitable", keywords="equities", keywords="equity", keywords="ethic", keywords="ethical", keywords="ethics", keywords="fair", keywords="fairness", keywords="health disparities", keywords="health equity", keywords="humanitarian", keywords="machine learning", keywords="ML", abstract="Background: Artificial intelligence (AI) and machine learning (ML) technology design and development continues to be rapid, despite major limitations in its current form as a practice and discipline to address all sociohumanitarian issues and complexities. From these limitations emerges an imperative to strengthen AI and ML literacy in underserved communities and build a more diverse AI and ML design and development workforce engaged in health research. Objective: AI and ML has the potential to account for and assess a variety of factors that contribute to health and disease and to improve prevention, diagnosis, and therapy. Here, we describe recent activities within the Artificial Intelligence/Machine Learning Consortium to Advance Health Equity and Researcher Diversity (AIM-AHEAD) Ethics and Equity Workgroup (EEWG) that led to the development of deliverables that will help put ethics and fairness at the forefront of AI and ML applications to build equity in biomedical research, education, and health care. Methods: The AIM-AHEAD EEWG was created in 2021 with 3 cochairs and 51 members in year 1 and 2 cochairs and {\textasciitilde}40 members in year 2. Members in both years included AIM-AHEAD principal investigators, coinvestigators, leadership fellows, and research fellows. The EEWG used a modified Delphi approach using polling, ranking, and other exercises to facilitate discussions around tangible steps, key terms, and definitions needed to ensure that ethics and fairness are at the forefront of AI and ML applications to build equity in biomedical research, education, and health care. Results: The EEWG developed a set of ethics and equity principles, a glossary, and an interview guide. The ethics and equity principles comprise 5 core principles, each with subparts, which articulate best practices for working with stakeholders from historically and presently underrepresented communities. The glossary contains 12 terms and definitions, with particular emphasis on optimal development, refinement, and implementation of AI and ML in health equity research. To accompany the glossary, the EEWG developed a concept relationship diagram that describes the logical flow of and relationship between the definitional concepts. Lastly, the interview guide provides questions that can be used or adapted to garner stakeholder and community perspectives on the principles and glossary. Conclusions: Ongoing engagement is needed around our principles and glossary to identify and predict potential limitations in their uses in AI and ML research settings, especially for institutions with limited resources. This requires time, careful consideration, and honest discussions around what classifies an engagement incentive as meaningful to support and sustain their full engagement. By slowing down to meet historically and presently underresourced institutions and communities where they are and where they are capable of engaging and competing, there is higher potential to achieve needed diversity, ethics, and equity in AI and ML implementation in health research. ", doi="10.2196/52888", url="https://ai.jmir.org/2023/1/e52888", url="http://www.ncbi.nlm.nih.gov/pubmed/38875540" } @Article{info:doi/10.2196/47353, author="Hummelsberger, Pia and Koch, K. Timo and Rauh, Sabrina and Dorn, Julia and Lermer, Eva and Raue, Martina and Hudecek, C. Matthias F. and Schicho, Andreas and Colak, Errol and Ghassemi, Marzyeh and Gaube, Susanne", title="Insights on the Current State and Future Outlook of AI in Health Care: Expert Interview Study", journal="JMIR AI", year="2023", month="Oct", day="31", volume="2", pages="e47353", keywords="artificial intelligence", keywords="AI", keywords="machine learning", keywords="health care", keywords="digital health technology", keywords="technology implementation", keywords="expert interviews", keywords="mixed methods", keywords="topic modeling", abstract="Background: Artificial intelligence (AI) is often promoted as a potential solution for many challenges health care systems face worldwide. However, its implementation in clinical practice lags behind its technological development. Objective: This study aims to gain insights into the current state and prospects of AI technology from the stakeholders most directly involved in its adoption in the health care sector whose perspectives have received limited attention in research to date. Methods: For this purpose, the perspectives of AI researchers and health care IT professionals in North America and Western Europe were collected and compared for profession-specific and regional differences. In this preregistered, mixed methods, cross-sectional study, 23 experts were interviewed using a semistructured guide. Data from the interviews were analyzed using deductive and inductive qualitative methods for the thematic analysis along with topic modeling to identify latent topics. Results: Through our thematic analysis, four major categories emerged: (1) the current state of AI systems in health care, (2) the criteria and requirements for implementing AI systems in health care, (3) the challenges in implementing AI systems in health care, and (4) the prospects of the technology. Experts discussed the capabilities and limitations of current AI systems in health care in addition to their prevalence and regional differences. Several criteria and requirements deemed necessary for the successful implementation of AI systems were identified, including the technology's performance and security, smooth system integration and human-AI interaction, costs, stakeholder involvement, and employee training. However, regulatory, logistical, and technical issues were identified as the most critical barriers to an effective technology implementation process. In the future, our experts predicted both various threats and many opportunities related to AI technology in the health care sector. Conclusions: Our work provides new insights into the current state, criteria, challenges, and outlook for implementing AI technology in health care from the perspective of AI researchers and IT professionals in North America and Western Europe. For the full potential of AI-enabled technologies to be exploited and for them to contribute to solving current health care challenges, critical implementation criteria must be met, and all groups involved in the process must work together. ", doi="10.2196/47353", url="https://ai.jmir.org/2023/1/e47353", url="http://www.ncbi.nlm.nih.gov/pubmed/38875571" } @Article{info:doi/10.2196/47449, author="Kim, Paik Jane and Ryan, Katie and Kasun, Max and Hogg, Justin and Dunn, B. Laura and Roberts, Weiss Laura", title="Physicians' and Machine Learning Researchers' Perspectives on Ethical Issues in the Early Development of Clinical Machine Learning Tools: Qualitative Interview Study", journal="JMIR AI", year="2023", month="Oct", day="30", volume="2", pages="e47449", keywords="artificial intelligence", keywords="machine learning", keywords="ethical considerations", keywords="qualitative study", keywords="qualitative", keywords="ethic", keywords="ethics", keywords="ethical", keywords="perspective", abstract="Background: Innovative tools leveraging artificial intelligence (AI) and machine learning (ML) are rapidly being developed for medicine, with new applications emerging in prediction, diagnosis, and treatment across a range of illnesses, patient populations, and clinical procedures. One barrier for successful innovation is the scarcity of research in the current literature seeking and analyzing the views of AI or ML researchers and physicians to support ethical guidance. Objective: This study aims to describe, using a qualitative approach, the landscape of ethical issues that AI or ML researchers and physicians with professional exposure to AI or ML tools observe or anticipate in the development and use of AI and ML in medicine. Methods: Semistructured interviews were used to facilitate in-depth, open-ended discussion, and a purposeful sampling technique was used to identify and recruit participants. We conducted 21 semistructured interviews with a purposeful sample of AI and ML researchers (n=10) and physicians (n=11). We asked interviewees about their views regarding ethical considerations related to the adoption of AI and ML in medicine. Interviews were transcribed and deidentified by members of our research team. Data analysis was guided by the principles of qualitative content analysis. This approach, in which transcribed data is broken down into descriptive units that are named and sorted based on their content, allows for the inductive emergence of codes directly from the data set. Results: Notably, both researchers and physicians articulated concerns regarding how AI and ML innovations are shaped in their early development (ie, the problem formulation stage). Considerations encompassed the assessment of research priorities and motivations, clarity and centeredness of clinical needs, professional and demographic diversity of research teams, and interdisciplinary knowledge generation and collaboration. Phase-1 ethical issues identified by interviewees were notably interdisciplinary in nature and invited questions regarding how to align priorities and values across disciplines and ensure clinical value throughout the development and implementation of medical AI and ML. Relatedly, interviewees suggested interdisciplinary solutions to these issues, for example, more resources to support knowledge generation and collaboration between developers and physicians, engagement with a broader range of stakeholders, and efforts to increase diversity in research broadly and within individual teams. Conclusions: These qualitative findings help elucidate several ethical challenges anticipated or encountered in AI and ML for health care. Our study is unique in that its use of open-ended questions allowed interviewees to explore their sentiments and perspectives without overreliance on implicit assumptions about what AI and ML currently are or are not. This analysis, however, does not include the perspectives of other relevant stakeholder groups, such as patients, ethicists, industry researchers or representatives, or other health care professionals beyond physicians. Additional qualitative and quantitative research is needed to reproduce and build on these findings. ", doi="10.2196/47449", url="https://ai.jmir.org/2023/1/e47449", url="http://www.ncbi.nlm.nih.gov/pubmed/38875536" } @Article{info:doi/10.2196/47223, author="Malgaroli, Matteo and Tseng, Emily and Hull, D. Thomas and Jennings, Emma and Choudhury, K. Tanzeem and Simon, M. Naomi", title="Association of Health Care Work With Anxiety and Depression During the COVID-19 Pandemic: Structural Topic Modeling Study", journal="JMIR AI", year="2023", month="Oct", day="24", volume="2", pages="e47223", keywords="depression", keywords="anxiety", keywords="health care workers", keywords="COVID-19", keywords="natural language processing", keywords="topic modeling", keywords="stressor", keywords="mental health", keywords="treatment", keywords="psychotherapy", keywords="digital health", abstract="Background: Stressors for health care workers (HCWs) during the COVID-19 pandemic have been manifold, with high levels of depression and anxiety alongside gaps in care. Identifying the factors most tied to HCWs' psychological challenges is crucial to addressing HCWs' mental health needs effectively, now and for future large-scale events. Objective: In this study, we used natural language processing methods to examine deidentified psychotherapy transcripts from telemedicine treatment during the initial wave of COVID-19 in the United States. Psychotherapy was delivered by licensed therapists while HCWs were managing increased clinical demands and elevated hospitalization rates, in addition to population-level social distancing measures and infection risks. Our goal was to identify specific concerns emerging in treatment for HCWs and to compare differences with matched non-HCW patients from the general population. Methods: We conducted a case-control study with a sample of 820 HCWs and 820 non-HCW matched controls who received digitally delivered psychotherapy in 49 US states in the spring of 2020 during the first US wave of the COVID-19 pandemic. Depression was measured during the initial assessment using the Patient Health Questionnaire-9, and anxiety was measured using the General Anxiety Disorder-7 questionnaire. Structural topic models (STMs) were used to determine treatment topics from deidentified transcripts from the first 3 weeks of treatment. STM effect estimators were also used to examine topic prevalence in patients with moderate to severe anxiety and depression. Results: The median treatment enrollment date was April 15, 2020 (IQR March 31 to April 27, 2020) for HCWs and April 19, 2020 (IQR April 5 to April 27, 2020) for matched controls. STM analysis of deidentified transcripts identified 4 treatment topics centered on health care and 5 on mental health for HCWs. For controls, 3 STM topics on pandemic-related disruptions and 5 on mental health were identified. Several STM treatment topics were significantly associated with moderate to severe anxiety and depression, including working on the hospital unit (topic prevalence 0.035, 95\% CI 0.022-0.048; P<.001), mood disturbances (prevalence 0.014, 95\% CI 0.002-0.026; P=.03), and sleep disturbances (prevalence 0.016, 95\% CI 0.002-0.030; P=.02). No significant associations emerged between pandemic-related topics and moderate to severe anxiety and depression for non-HCW controls. Conclusions: The study provides large-scale quantitative evidence that during the initial wave of the COVID-19 pandemic, HCWs faced unique work-related challenges and stressors associated with anxiety and depression, which required dedicated treatment efforts. The study further demonstrates how natural language processing methods have the potential to surface clinically relevant markers of distress while preserving patient privacy. ", doi="10.2196/47223", url="https://ai.jmir.org/2023/1/e47223", url="http://www.ncbi.nlm.nih.gov/pubmed/38875560" } @Article{info:doi/10.2196/43483, author="Saraswat, Nidhi and Li, Chuqin and Jiang, Min", title="Identifying the Question Similarity of Regulatory Documents in the Pharmaceutical Industry by Using the Recognizing Question Entailment System: Evaluation Study", journal="JMIR AI", year="2023", month="Sep", day="26", volume="2", pages="e43483", keywords="regulatory affairs", keywords="frequently asked questions", keywords="FAQs", keywords="Recognizing Question Entailment system", keywords="RQE system", keywords="transformer-based models", keywords="textual data augmentations", abstract="Background: The regulatory affairs (RA) division in a pharmaceutical establishment is the point of contact between regulatory authorities and pharmaceutical companies. They are delegated the crucial and strenuous task of extracting and summarizing relevant information in the most meticulous manner from various search systems. An artificial intelligence (AI)--based intelligent search system that can significantly bring down the manual efforts in the existing processes of the RA department while maintaining and improving the quality of final outcomes is desirable. We proposed a ``frequently asked questions'' component and its utility in an AI-based intelligent search system in this paper. The scenario is further complicated by the lack of publicly available relevant data sets in the RA domain to train the machine learning models that can facilitate cognitive search systems for regulatory authorities. Objective: In this study, we aimed to use AI-based intelligent computational models to automatically recognize semantically similar question pairs in the RA domain and evaluate the Recognizing Question Entailment--based system. Methods: We used transfer learning techniques and experimented with transformer-based models pretrained on corpora collected from different resources, such as Bidirectional Encoder Representations from Transformers (BERT), Clinical BERT, BioBERT, and BlueBERT. We used a manually labeled data set that contained 150 question pairs in the pharmaceutical regulatory domain to evaluate the performance of our model. Results: The Clinical BERT model performed better than other domain-specific BERT-based models in identifying question similarity from the RA domain. The BERT model had the best ability to learn domain-specific knowledge with transfer learning, which reached the best performance when fine-tuned with sufficient clinical domain question pairs. The top-performing model achieved an accuracy of 90.66\% on the test set. Conclusions: This study demonstrates the possibility of using pretrained language models to recognize question similarity in the pharmaceutical regulatory domain. Transformer-based models that are pretrained on clinical notes perform better than models pretrained on biomedical text in recognizing the question's semantic similarity in this domain. We also discuss the challenges of using data augmentation techniques to address the lack of relevant data in this domain. The results of our experiment indicated that increasing the number of training samples using back translation and entity replacement did not enhance the model's performance. This lack of improvement may be attributed to the intricate and specialized nature of texts in the regulatory domain. Our work provides the foundation for further studies that apply state-of-the-art linguistic models to regulatory documents in the pharmaceutical industry. ", doi="10.2196/43483", url="https://ai.jmir.org/2023/1/e43483" } @Article{info:doi/10.2196/46487, author="Robinson, Renee and Liday, Cara and Lee, Sarah and Williams, C. Ishan and Wright, Melanie and An, Sungjoon and Nguyen, Elaine", title="Artificial Intelligence in Health Care---Understanding Patient Information Needs and Designing Comprehensible Transparency: Qualitative Study", journal="JMIR AI", year="2023", month="Jun", day="19", volume="2", pages="e46487", keywords="artificial intelligence", keywords="machine learning", keywords="diabetes", keywords="equipment safety", keywords="equipment design", keywords="health care", abstract="Background: Artificial intelligence (AI) is a branch of computer science that uses advanced computational methods, such as machine learning (ML), to calculate and predict health outcomes and address patient and provider health needs. While these technologies show great promise for improving health care, especially in diabetes management, there are usability and safety concerns for both patients and providers about the use of AI/ML in health care management. Objective: We aimed to support and ensure safe use of AI/ML technologies in health care; thus, the team worked to better understand (1) patient information and training needs, (2) the factors that influence patients' perceived value and trust in AI/ML health care applications, and (3) how best to support safe and appropriate use of AI/ML-enabled devices and applications among people living with diabetes. Methods: To understand general patient perspectives and information needs related to the use of AI/ML in health care, we conducted a series of focus groups (n=9) and interviews (n=3) with patients (n=41) and interviews with providers (n=6) in Alaska, Idaho, and Virginia. Grounded theory guided data gathering, synthesis, and analysis. Thematic content and constant comparison analysis were used to identify relevant themes and subthemes. Inductive approaches were used to link data to key concepts, including preferred patient-provider interactions and patient perceptions of trust, accuracy, value, assurances, and information transparency. Results: Key summary themes and recommendations focused on (1) patient preferences for AI/ML-enabled device and application information, (2) patient and provider AI/ML-related device and application training needs, (3) factors contributing to patient and provider trust in AI/ML-enabled devices and applications, and (4) AI/ML-related device and application functionality and safety considerations. A number of participants (patients and providers) made recommendations to improve device functionality to guide information and labeling mandates (eg, link to online video resources and provide access to 24/7 live in-person or virtual emergency support). Other patient recommendations included (1) providing access to practice devices, (2) providing connections to local supports and reputable community resources, and (3) simplifying the display and alert limits. Conclusions: Recommendations from both patients and providers could be used by federal oversight agencies to improve utilization of AI/ML monitoring of technology use in diabetes, improving device safety and efficacy. ", doi="10.2196/46487", url="https://ai.jmir.org/2023/1/e46487", url="http://www.ncbi.nlm.nih.gov/pubmed/38333424" } @Article{info:doi/10.2196/47283, author="Benjamens, Stan and Dhunnoo, Pranavsingh and G{\"o}r{\"o}g, M{\'a}rton and Mesko, Bertalan", title="Forecasting Artificial Intelligence Trends in Health Care: Systematic International Patent Analysis", journal="JMIR AI", year="2023", month="May", day="26", volume="2", pages="e47283", keywords="artificial intelligence", keywords="patent", keywords="healthcare", keywords="health care", keywords="medical", keywords="forecasting", keywords="future", keywords="AI", keywords="machine learning", keywords="medical device", keywords="open-access", keywords="AI technology", abstract="Background: Artificial intelligence (AI)-- and machine learning (ML)--based medical devices and algorithms are rapidly changing the medical field. To provide an insight into the trends in AI and ML in health care, we conducted an international patent analysis. Objective: It is pivotal to obtain a clear overview on upcoming AI and MLtrends in health care to provide regulators with a better position to foresee what technologies they will have to create regulations for, which are not yet available on the market. Therefore, in this study, we provide insights and forecasts into the trends in AI and ML in health care by conducting an international patent analysis. Methods: A systematic patent analysis, focusing on AI- and ML-based patents in health care, was performed using the Espacenet database (from January 2012 until July 2022). This database includes patents from the China National Intellectual Property Administration, European Patent Office, Japan Patent Office, Korean Intellectual Property Office, and the United States Patent and Trademark Office. Results: We identified 10,967 patents: 7332 (66.9\%) from the China National Intellectual Property Administration, 191 (1.7\%) from the European Patent Office, 163 (1.5\%) from the Japan Patent Office, 513 (4.7\%) from the Korean Intellectual Property Office, and 2768 (25.2\%) from the United States Patent and Trademark Office. The number of published patents showed a yearly doubling from 2015 until 2021. Five international companies that had the greatest impact on this increase were Ping An Medical and Healthcare Management Co Ltd with 568 (5.2\%) patents, Siemens Healthineers with 273 (2.5\%) patents, IBM Corp with 226 (2.1\%) patents, Philips Healthcare with 150 (1.4\%) patents, and Shanghai United Imaging Healthcare Co Ltd with 144 (1.3\%) patents. Conclusions: This international patent analysis showed a linear increase in patents published by the 5 largest patent offices. An open access database with interactive search options was launched for AI- and ML-based patents in health care. ", doi="10.2196/47283", url="https://ai.jmir.org/2023/1/e47283", url="http://www.ncbi.nlm.nih.gov/pubmed/10449890" } @Article{info:doi/10.2196/41205, author="Owen, David and Antypas, Dimosthenis and Hassoulas, Athanasios and Pardi{\~n}as, F. Antonio and Espinosa-Anke, Luis and Collados, Camacho Jose", title="Enabling Early Health Care Intervention by Detecting Depression in Users of Web-Based Forums using Language Models: Longitudinal Analysis and Evaluation", journal="JMIR AI", year="2023", month="Mar", day="24", volume="2", pages="e41205", keywords="mental health", keywords="depression", keywords="internet", keywords="natural language processing", keywords="transformers", keywords="language models", keywords="sentiment", abstract="Background: Major depressive disorder is a common mental disorder affecting 5\% of adults worldwide. Early contact with health care services is critical for achieving accurate diagnosis and improving patient outcomes. Key symptoms of major depressive disorder (depression hereafter) such as cognitive distortions are observed in verbal communication, which can also manifest in the structure of written language. Thus, the automatic analysis of text outputs may provide opportunities for early intervention in settings where written communication is rich and regular, such as social media and web-based forums. Objective: The objective of this study was 2-fold. We sought to gauge the effectiveness of different machine learning approaches to identify users of the mass web-based forum Reddit, who eventually disclose a diagnosis of depression. We then aimed to determine whether the time between a forum post and a depression diagnosis date was a relevant factor in performing this detection. Methods: A total of 2 Reddit data sets containing posts belonging to users with and without a history of depression diagnosis were obtained. The intersection of these data sets provided users with an estimated date of depression diagnosis. This derived data set was used as an input for several machine learning classifiers, including transformer-based language models (LMs). Results: Bidirectional Encoder Representations from Transformers (BERT) and MentalBERT transformer-based LMs proved the most effective in distinguishing forum users with a known depression diagnosis from those without. They each obtained a mean F1-score of 0.64 across the experimental setups used for binary classification. The results also suggested that the final 12 to 16 weeks (about 3-4 months) of posts before a depressed user's estimated diagnosis date are the most indicative of their illness, with data before that period not helping the models detect more accurately. Furthermore, in the 4- to 8-week period before the user's estimated diagnosis date, their posts exhibited more negative sentiment than any other 4-week period in their post history. Conclusions: Transformer-based LMs may be used on data from web-based social media forums to identify users at risk for psychiatric conditions such as depression. Language features picked up by these classifiers might predate depression onset by weeks to months, enabling proactive mental health care interventions to support those at risk for this condition. ", doi="10.2196/41205", url="https://ai.jmir.org/2023/1/e41205", url="http://www.ncbi.nlm.nih.gov/pubmed/37525646" } @Article{info:doi/10.2196/40973, author="Jeyakumar, Tharshini and Younus, Sarah and Zhang, Melody and Clare, Megan and Charow, Rebecca and Karsan, Inaara and Dhalla, Azra and Al-Mouaswas, Dalia and Scandiffio, Jillian and Aling, Justin and Salhia, Mohammad and Lalani, Nadim and Overholt, Scott and Wiljer, David", title="Preparing for an Artificial Intelligence--Enabled Future: Patient Perspectives on Engagement and Health Care Professional Training for Adopting Artificial Intelligence Technologies in Health Care Settings", journal="JMIR AI", year="2023", month="Mar", day="2", volume="2", pages="e40973", keywords="artificial intelligence", keywords="patient", keywords="education", keywords="attitude", keywords="health data", keywords="adoption", keywords="health equity", keywords="patient engagement", abstract="Background: As new technologies emerge, there is a significant shift in the way care is delivered on a global scale. Artificial intelligence (AI) technologies have been rapidly and inexorably used to optimize patient outcomes, reduce health system costs, improve workflow efficiency, and enhance population health. Despite the widespread adoption of AI technologies, the literature on patient engagement and their perspectives on how AI will affect clinical care is scarce. Minimal patient engagement can limit the optimization of these novel technologies and contribute to suboptimal use in care settings. Objective: We aimed to explore patients' views on what skills they believe health care professionals should have in preparation for this AI-enabled future and how we can better engage patients when adopting and deploying AI technologies in health care settings. Methods: Semistructured interviews were conducted from August 2020 to December 2021 with 12 individuals who were a patient in any Canadian health care setting. Interviews were conducted until thematic saturation occurred. A thematic analysis approach outlined by Braun and Clarke was used to inductively analyze the data and identify overarching themes. Results: Among the 12 patients interviewed, 8 (67\%) were from urban settings and 4 (33\%) were from rural settings. A majority of the participants were very comfortable with technology (n=6, 50\%) and somewhat familiar with AI (n=7, 58\%). In total, 3 themes emerged: cultivating patients' trust, fostering patient engagement, and establishing data governance and validation of AI technologies. Conclusions: With the rapid surge of AI solutions, there is a critical need to understand patient values in advancing the quality of care and contributing to an equitable health system. Our study demonstrated that health care professionals play a synergetic role in the future of AI and digital technologies. Patient engagement is vital in addressing underlying health inequities and fostering an optimal care experience. Future research is warranted to understand and capture the diverse perspectives of patients with various racial, ethnic, and socioeconomic backgrounds. ", doi="10.2196/40973", url="https://ai.jmir.org/2023/1/e40973", url="http://www.ncbi.nlm.nih.gov/pubmed/38875561" } @Article{info:doi/10.2196/42936, author="Berdahl, Thomas Carl and Baker, Lawrence and Mann, Sean and Osoba, Osonde and Girosi, Federico", title="Strategies to Improve the Impact of Artificial Intelligence on Health Equity: Scoping Review", journal="JMIR AI", year="2023", month="Feb", day="7", volume="2", pages="e42936", keywords="artificial intelligence", keywords="machine learning", keywords="health equity", keywords="health care disparities", keywords="algorithmic bias", keywords="social determinants of health", keywords="decision making", keywords="algorithms", keywords="gray literature", keywords="equity", keywords="health data", abstract="Background: Emerging artificial intelligence (AI) applications have the potential to improve health, but they may also perpetuate or exacerbate inequities. Objective: This review aims to provide a comprehensive overview of the health equity issues related to the use of AI applications and identify strategies proposed to address them. Methods: We searched PubMed, Web of Science, the IEEE (Institute of Electrical and Electronics Engineers) Xplore Digital Library, ProQuest U.S. Newsstream, Academic Search Complete, the Food and Drug Administration (FDA) website, and ClinicalTrials.gov to identify academic and gray literature related to AI and health equity that were published between 2014 and 2021 and additional literature related to AI and health equity during the COVID-19 pandemic from 2020 and 2021. Literature was eligible for inclusion in our review if it identified at least one equity issue and a corresponding strategy to address it. To organize and synthesize equity issues, we adopted a 4-step AI application framework: Background Context, Data Characteristics, Model Design, and Deployment. We then created a many-to-many mapping of the links between issues and strategies. Results: In 660 documents, we identified 18 equity issues and 15 strategies to address them. Equity issues related to Data Characteristics and Model Design were the most common. The most common strategies recommended to improve equity were improving the quantity and quality of data, evaluating the disparities introduced by an application, increasing model reporting and transparency, involving the broader community in AI application development, and improving governance. Conclusions: Stakeholders should review our many-to-many mapping of equity issues and strategies when planning, developing, and implementing AI applications in health care so that they can make appropriate plans to ensure equity for populations affected by their products. AI application developers should consider adopting equity-focused checklists, and regulators such as the FDA should consider requiring them. Given that our review was limited to documents published online, developers may have unpublished knowledge of additional issues and strategies that we were unable to identify. ", doi="10.2196/42936", url="https://ai.jmir.org/2023/1/e42936" } @Article{info:doi/10.2196/42940, author="Mashar, Meghavi and Chawla, Shreya and Chen, Fangyue and Lubwama, Baker and Patel, Kyle and Kelshiker, A. Mihir and Bachtiger, Patrik and Peters, S. Nicholas", title="Artificial Intelligence Algorithms in Health Care: Is the Current Food and Drug Administration Regulation Sufficient?", journal="JMIR AI", year="2023", month="Jan", day="16", volume="2", pages="e42940", keywords="artificial intelligence", keywords="machine learning", keywords="regulation", doi="10.2196/42940", url="https://ai.jmir.org/2023/1/e42940" } @Article{info:doi/10.2196/41940, author="Barry, Barbara and Zhu, Xuan and Behnken, Emma and Inselman, Jonathan and Schaepe, Karen and McCoy, Rozalina and Rushlow, David and Noseworthy, Peter and Richardson, Jordan and Curtis, Susan and Sharp, Richard and Misra, Artika and Akfaly, Abdulla and Molling, Paul and Bernard, Matthew and Yao, Xiaoxi", title="Provider Perspectives on Artificial Intelligence--Guided Screening for Low Ejection Fraction in Primary Care: Qualitative Study", journal="JMIR AI", year="2022", month="Oct", day="14", volume="1", number="1", pages="e41940", keywords="artificial intelligence", keywords="AI", keywords="machine learning", keywords="human-AI interaction", keywords="health informatics", keywords="primary care", keywords="cardiology", keywords="pragmatic clinical trial", keywords="AI-enabled clinical decision support", keywords="human-computer interaction", keywords="health care delivery", keywords="clinical decision support", keywords="health care", keywords="AI tools", abstract="Background: The promise of artificial intelligence (AI) to transform health care is threatened by a tangle of challenges that emerge as new AI tools are introduced into clinical practice. AI tools with high accuracy, especially those that detect asymptomatic cases, may be hindered by barriers to adoption. Understanding provider needs and concerns is critical to inform implementation strategies that improve provider buy-in and adoption of AI tools in medicine. Objective: This study aimed to describe provider perspectives on the adoption of an AI-enabled screening tool in primary care to inform effective integration and sustained use. Methods: A qualitative study was conducted between December 2019 and February 2020 as part of a pragmatic randomized controlled trial at a large academic medical center in the United States. In all, 29 primary care providers were purposively sampled using a positive deviance approach for participation in semistructured focus groups after their use of the AI tool in the randomized controlled trial was complete. Focus group data were analyzed using a grounded theory approach; iterative analysis was conducted to identify codes and themes, which were synthesized into findings. Results: Our findings revealed that providers understood the purpose and functionality of the AI tool and saw potential value for more accurate and faster diagnoses. However, successful adoption into routine patient care requires the smooth integration of the tool with clinical decision-making and existing workflow to address provider needs and preferences during implementation. To fulfill the AI tool's promise of clinical value, providers identified areas for improvement including integration with clinical decision-making, cost-effectiveness and resource allocation, provider training, workflow integration, care pathway coordination, and provider-patient communication. Conclusions: The implementation of AI-enabled tools in medicine can benefit from sensitivity to the nuanced context of care and provider needs to enable the useful adoption of AI tools at the point of care. Trial Registration: ClinicalTrials.gov NCT04000087; https://clinicaltrials.gov/ct2/show/NCT04000087 ", doi="10.2196/41940", url="https://ai.jmir.org/2022/1/e41940", url="http://www.ncbi.nlm.nih.gov/pubmed/38875550" }