<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR AI</journal-id><journal-id journal-id-type="publisher-id">ai</journal-id><journal-id journal-id-type="index">41</journal-id><journal-title>JMIR AI</journal-title><abbrev-journal-title>JMIR AI</abbrev-journal-title><issn pub-type="epub">2817-1705</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v4i1e70222</article-id><article-id pub-id-type="doi">10.2196/70222</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Using AI to Translate and Simplify Spanish Orthopedic Medical Text: Instrument Validation Study</article-title></title-group><contrib-group><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Andalib</surname><given-names>Saman</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" corresp="yes" equal-contrib="yes"><name name-style="western"><surname>Spina</surname><given-names>Aidin</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Picton</surname><given-names>Bryce</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Solomon</surname><given-names>Sean S</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Scolaro</surname><given-names>John A</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Nelson</surname><given-names>Ariana M</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib></contrib-group><aff id="aff1"><institution>UCI School of Medicine, University of California</institution><addr-line>1001 Health Sciences Rd</addr-line><addr-line>Irvine</addr-line><addr-line>CA</addr-line><country>United States</country></aff><aff id="aff2"><institution>Department of Orthopaedic Surgery, UC Irvine Health</institution><addr-line>Orange</addr-line><country>United States</country></aff><aff id="aff3"><institution>Department of Anesthesiology, UC Irvine Health</institution><addr-line>Orange</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Gardezi</surname><given-names>Sabiha</given-names></name></contrib><contrib contrib-type="editor"><name name-style="western"><surname>Yin</surname><given-names>Zhijun</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Zickler</surname><given-names>Christine</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Xie</surname><given-names>Yi</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Aidin Spina, BS, UCI School of Medicine, University of California, 1001 Health Sciences Rd, Irvine, CA, 92617, United States, 1 (949) 824-6119; <email>acspina@hs.uci.edu</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>21</day><month>3</month><year>2025</year></pub-date><volume>4</volume><elocation-id>e70222</elocation-id><history><date date-type="received"><day>17</day><month>12</month><year>2024</year></date><date date-type="rev-recd"><day>06</day><month>02</month><year>2025</year></date><date date-type="accepted"><day>12</day><month>02</month><year>2025</year></date></history><copyright-statement>&#x00A9; Saman Andalib, Aidin Spina, Bryce Picton, Sean S Solomon, John A Scolaro, Ariana M Nelson. Originally published in JMIR AI (<ext-link ext-link-type="uri" xlink:href="https://ai.jmir.org">https://ai.jmir.org</ext-link>), 21.3.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR AI, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.ai.jmir.org/">https://www.ai.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://ai.jmir.org/2025/1/e70222"/><abstract><sec><title>Background</title><p>Language barriers contribute significantly to health care disparities in the United States, where a sizable proportion of patients are exclusively Spanish speakers. In orthopedic surgery, such barriers impact both patients&#x2019; comprehension of and patients&#x2019; engagement with available resources. Studies have explored the utility of large language models (LLMs) for medical translation but have yet to robustly evaluate artificial intelligence (AI)&#x2013;driven translation and simplification of orthopedic materials for Spanish speakers.</p></sec><sec><title>Objective</title><p>This study used the bilingual evaluation understudy (BLEU) method to assess translation quality and investigated the ability of AI to simplify patient education materials (PEMs) in Spanish.</p></sec><sec sec-type="methods"><title>Methods</title><p>PEMs (n=78) from the American Academy of Orthopaedic Surgery were translated from English to Spanish, using 2 LLMs (GPT-4 and Google Translate). The BLEU methodology was applied to compare AI translations with professionally human-translated PEMs. The Friedman test and Dunn multiple comparisons test were used to statistically quantify differences in translation quality. A readability analysis and feature analysis were subsequently performed to evaluate text simplification success and the impact of English text features on BLEU scores. The capability of an LLM to simplify medical language written in Spanish was also assessed.</p></sec><sec sec-type="results"><title>Results</title><p>As measured by BLEU scores, GPT-4 showed moderate success in translating PEMs into Spanish but was less successful than Google Translate. Simplified PEMs demonstrated improved readability when compared to original versions (<italic>P</italic>&#x003C;.001) but were unable to reach the targeted grade level for simplification. The feature analysis revealed that the total number of syllables and average number of syllables per sentence had the highest impact on BLEU scores. GPT-4 was able to significantly reduce the complexity of medical text written in Spanish (<italic>P</italic>&#x003C;.001).</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Although Google Translate outperformed GPT-4 in translation accuracy, LLMs, such as GPT-4, may provide significant utility in translating medical texts into Spanish and simplifying such texts. We recommend considering a dual approach&#x2014;using Google Translate for translation and GPT-4 for simplification&#x2014;to improve medical information accessibility and orthopedic surgery education among Spanish-speaking patients.</p></sec></abstract><kwd-group><kwd>large language models</kwd><kwd>LLM</kwd><kwd>patient education</kwd><kwd>translation</kwd><kwd>bilingual evaluation understudy</kwd><kwd>GPT-4</kwd><kwd>Google Translate</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>It has been well documented that racial and ethnic minority patient groups in the United States endure substantial limitations in patient care [<xref ref-type="bibr" rid="ref1">1</xref>]. Specifically, significant disparities in health care outcomes between White populations and Hispanic populations persist in several overarching domains of medicine, including but not limited to rates of diabetes, hypertension, and insurance status [<xref ref-type="bibr" rid="ref2">2</xref>]. Moreover, previous research suggests that language barriers may be associated with larger lapses in perioperative process-of-care outcomes [<xref ref-type="bibr" rid="ref3">3</xref>], and patient populations who experience language barriers also face increased predisposition to hospital readmission and emergency department visits, further highlighting their susceptibility to undesired health care outcomes [<xref ref-type="bibr" rid="ref4">4</xref>].</p><p>In the field of orthopedic surgery, these disparities are broadly evident [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref7">7</xref>]. From initial access to orthopedic care to postoperative outcomes, Spanish-speaking patients contend with significant barriers in accessing high-quality care [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. Hispanic populations often have limitations in their ability to schedule appointments for orthopedic concerns and often do not pursue revision surgery in cases of nonoptimal outcomes after surgical intervention [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. During orthopedic clinic visits, more than half of Spanish-speaking patients have been asked to rely on nonqualified or ad hoc interpreters rather than professional services, indicating that this patient group faces limitations in access to clear and accurate information about orthopedic procedures and services [<xref ref-type="bibr" rid="ref9">9</xref>]. These disparities may interact and thereby have implications on patient-reported outcome measures (PROMs) for Spanish-speaking populations. Additionally, recent work has evaluated the suitableness of PROMs for Spanish-speaking populations [<xref ref-type="bibr" rid="ref10">10</xref>]. Commonly used PROMs for Spanish-speaking patient groups were shown to be written at a reading level above the recommended complexity for patient populations in the United States. Technological advancements can provide avenues to address these concerns if they are implemented in a manner that is tailored to their intended patient populations [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. Thus, given the widespread documentation of disparities in orthopedic care that Spanish-speaking patients endure, further evaluation of how emerging technologies can address these lapses is extremely important.</p><p>Artificial intelligence (AI) has provided unique solutions to problems in health care, including those related to graduate medical education and patients&#x2019; comprehension of medical text [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref17">17</xref>]. Recent work has turned to using publicly available large language models (LLMs) to translate patient discharge summaries and frequently asked questions. The utility of these tools in translating medical text has been illustrated in qualitative textual evaluations conducted via human grading [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. However, studies have yet to evaluate AI-enabled textual translation through robust quantitative analysis involving bilingual evaluation understudy (BLEU) analysis [<xref ref-type="bibr" rid="ref20">20</xref>]. This methodology quantitatively rates machine-translated text against human translation and has been used in clinical studies [<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref23">23</xref>]. Additionally, no study has evaluated AI-driven simplification of Spanish medical text, although AI-driven simplification is a functionality that our group previously quantitatively evaluated for English medical text [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>].</p><p>The goals of this study were twofold. First, we aimed to conduct a robust quantitative evaluation of machine translations of medical text by using BLEU analysis, and second, we aimed to assess whether AI platforms can be used to simplify orthopedic medical text written in Spanish.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design</title><p>A total of 78 patient education materials (PEMs) from the American Academy of Orthopaedic Surgery (AAOS) were translated from English into Spanish, using 4 different GPT-4 input prompts via the application programming interface (prompts 1&#x2010;4; <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) [<xref ref-type="bibr" rid="ref26">26</xref>] and Google Translate via the googletrans package (SuHun Han). Each machine-generated translation was compared to the professionally human-translated reference from the AAOS, using BLEU analysis via the Natural Language Toolkit (NLTK) [<xref ref-type="bibr" rid="ref27">27</xref>]; BLEU scores range from 0 to 1, with scores of &#x2265;0.5 indicating high similarity to a designated reference text. A Friedman test, followed by a Dunn multiple comparisons test, was performed for each BLEU score to quantify differences in translation quality. Unigram, bigram, trigram, and fourgram precision analyses were conducted to further assess the translation quality. A Friedman test was followed by Dunn multiple comparisons for each precision metric.</p><p>To assess the simplification of the PEMs, we compared the readability of translations generated by GPT-4&#x2019;s prompt 1 and that of the original AAOS Spanish versions before and after simplification. Spanish text was simplified by using a standardized prompt that was validated for medical use cases [<xref ref-type="bibr" rid="ref16">16</xref>]. Text complexity was analyzed by counting sentences, words, and syllables with custom functions and the NLTK library [<xref ref-type="bibr" rid="ref27">27</xref>]. Readability was evaluated by using the Fern&#x00E1;ndez-Huerta readability formula (FH = 206.84 &#x2013; [0.60 &#x00D7; P] &#x2212; [1.02 &#x00D7; F]; FH: reading ease score; P: average number of syllables per 100 words; F: average number of sentences per 100 words) [<xref ref-type="bibr" rid="ref28">28</xref>] and the INFLESZ readability formula (INFLESZ = 206.835 &#x2013; [62.3 &#x00D7; S/P] &#x2013; [P/F]; S: total number of syllables; P: total number of words; F: total number of sentences) [<xref ref-type="bibr" rid="ref29">29</xref>]. The Wilcoxon matched-pairs signed rank test was applied to compare the original and simplified versions, and the Spearman correlation coefficient was used to measure the strength of the association between the simplification process and improved readability.</p><p>To assess the impact of original English text features on translation quality, a feature analysis was performed. Random forest regression was completed, using 4 input features (number of words, average number of words per sentence, total number of syllables, and average number of syllables per sentence) of the original English PEM, to predict 20 distinct BLEU scores. These scores encompassed 4 BLEU scoring methods for Google Translate and 4 different GPT-4 input prompts. A 5-fold cross-validation was used to minimize overfitting of the data and to ensure robust feature importance calculations. Average importance scores across all folds were calculated to assess the contribution of each feature for translation performance.</p></sec><sec id="s2-2"><title>Ethical Considerations</title><p>No application was submitted for review board assessment because no human or animal participants participated directly or indirectly in this study. The University of California, Irvine Institutional Review Board does not require assessment of studies that do not directly or indirectly involve human or animal participants. This study consisted solely of a quantitative evaluation of machine translations and was hence exempt from any institutional review.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>BLEU Analysis</title><p>BLEU 1 scores (<xref ref-type="fig" rid="figure1">Figure 1A</xref>) revealed a statistically significant difference between Google Translate and each prompt (prompt 1: rank sum difference=63.00; <italic>P</italic>=.01; prompt 2: rank sum difference=81.00; <italic>P</italic>&#x003C;.001; prompt 3: rank sum difference=65.00; <italic>P</italic>=.01; prompt 4: rank sum difference=71.00; <italic>P</italic>=.003). No significant differences were observed among the 4 GPT prompts (all <italic>P</italic> values were &#x003E;.05). For BLEU 1, Google Translate had the highest rank sum (290.0), while prompt 2 had the lowest (209.0). Prompt 1 had a rank sum of 227.0, while prompts 3 and 4 had rank sums of 225.0 and 219.0, respectively.</p><p>For BLEU 2 scores (<xref ref-type="fig" rid="figure1">Figure 1B</xref>), a similar trend was observed, with significant differences between Google Translate and prompts 1, 2, 3, and 4. The rank sum difference was 76.00 between Google Translate and prompt 1 (<italic>P</italic>&#x003C;.001), 79.00 between prompt 2 and Google Translate (<italic>P</italic>&#x003C;.001), 73.00 between prompt 3 and Google Translate (<italic>P</italic>=.002), and 77.00 between prompt 4 and Google Translate (<italic>P</italic>&#x003C;.001). Again, no statistically significant differences were found between the 4 GPT prompts (all <italic>P</italic> values were &#x003E;.05). The rank sum for Google Translate was the highest (295.0), followed by those for prompt 3 (222.0), prompt 1 (219.0), and prompt 4 (218.0). Prompt 2 had the lowest rank sum (216.0).</p><p>For the BLEU 3 scores (<xref ref-type="fig" rid="figure1">Figure 1C</xref>), the Dunn test also showed significant differences between Google Translate and each prompt (prompt 1: rank sum difference=72.00; <italic>P</italic>=.003; prompt 2: rank sum difference=85.00; <italic>P</italic>&#x003C;.001; prompt 3: rank sum difference=76.00; <italic>P</italic>=.001; prompt 4: rank sum difference=82.00; <italic>P</italic>&#x003C;.001). No significant differences were found between the 4 GPT prompts (all <italic>P</italic> values were &#x003E;.05). The rank sums were as follows: 297.0 for Google Translate, 225.0 for prompt 1, 212.0 for prompt 2, 221.0 for prompt 3, and 215.0 for prompt 4.</p><p>Finally, BLEU 4 scores (<xref ref-type="fig" rid="figure1">Figure 1D</xref>) followed the same pattern as the BLEU scores in all 3 prior BLEU analyses, as the Dunn test revealed significant differences between Google Translate and each prompt (prompt 1: rank sum difference=74.00; <italic>P</italic>=.002; prompt 2: rank sum difference=77.00; <italic>P</italic>&#x003C;.001; prompt 3: rank sum difference=72.00; <italic>P</italic>=.003; prompt 4: rank sum difference=82.00; <italic>P</italic>&#x003C;.001). Google Translate had the highest rank sum (295.0), followed by prompt 3 (223.0), prompt 1 (221.0), and prompt 2 (218.0). Prompt 4 had the lowest rank sum (213.0).</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>BLEU scores for Google Translate and 4 GPT-4 input prompts (prompts 1-4). Box plots display the BLEU 1 (<bold>A</bold>), BLEU 2 (<bold>B</bold>), BLEU 3 (<bold>C</bold>), and BLEU 4 (<bold>D</bold>) scores for translations generated by Google Translate and the 4 different GPT-4 input prompts. BLEU: bilingual evaluation understudy.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e70222_fig01.png"/></fig></sec><sec id="s3-2"><title>N-Gram Precision Analysis</title><p>The unigram precision analysis (<xref ref-type="fig" rid="figure2">Figure 2A</xref>) revealed significant differences between Google Translate and prompts 1, 2, 3, and 4. The rank sum difference was 71.50 between Google Translate and prompt 1 (<italic>P</italic>=.003), 64.00 between prompt 2 and Google Translate (<italic>P</italic>=.01), 55.50 between prompt 3 and Google Translate (<italic>P</italic>=.05), and 74.00 between prompt 4 and Google Translate (<italic>P</italic>=.002). Google Translate had the highest rank sum (287.0), followed by prompt 3 (231.5), prompt 2 (223.0), and prompt 1 (215.5). Prompt 4 had the lowest rank sum (213.0).</p><p>The bigram precision analysis (<xref ref-type="fig" rid="figure2">Figure 2B</xref>) also revealed significant rank sum differences between Google Translate and each prompt (prompt 1: rank sum difference=93.00; <italic>P</italic>&#x003C;.001; prompt 2: rank sum difference=88.50; <italic>P</italic>&#x003C;.001; prompt 3: rank sum difference=79.50; <italic>P</italic>&#x003C;.001; prompt 4: rank sum difference=99.00; <italic>P</italic>&#x003C;.001). Google Translate had the highest rank sum (306.0), followed by prompt 3 (226.5). Prompt 2 followed with a rank sum of 217.5, and prompts 1 and 4 had a rank sum of 213.0 and 207.0, respectively.</p><p>For the trigram precision analysis (<xref ref-type="fig" rid="figure2">Figure 2C</xref>), the Dunn test revealed a pattern that was slightly different from the previously established pattern, with significant differences between Google Translate and prompt 1 (rank sum difference=80.00; <italic>P</italic>&#x003C;.001), between Google Translate and prompt 2 (rank sum difference=73.00; <italic>P</italic>=.002), and between Google Translate and prompt 4 (rank sum difference=74.00; <italic>P</italic>=.002). There was no significant difference in trigram precision between Google Translate and prompt 3 (<italic>P</italic>=.07). Google Translate had the highest rank sum (290.0), followed by prompt 3 (237.0). Prompt 2 had a rank sum of 217.0, while prompt 4 had a rank sum of 216.0. The lowest rank sum for trigram precision was recorded for prompt 1 (210.0).</p><p>The fourgram precision analysis (<xref ref-type="fig" rid="figure2">Figure 2D</xref>) showed the same pattern of significance as that in the trigram analysis, with significant differences between Google Translate and GPT prompts 1, 2, and 4. The rank sum difference between Google Translate and prompt 1 was 71.00 (<italic>P</italic>=.003). The rank sum differences between Google Translate and prompt 2 and between Google Translate and prompt 4 were 72.00 (<italic>P</italic>=.003) and 78.00 (<italic>P</italic>&#x003C;.001), respectively. Fourgram precision showed no statistically significant difference between Google Translate and prompt 3 (<italic>P</italic>=.06). Google Translate had the highest rank sum (289.0), while prompt 3 ranked second with a rank sum of 235.0. Prompt 1 had a rank sum of 218.0, and prompt 2 closely followed with a rank sum of 217.0. Prompt 4 had the lowest rank sum (211.0).</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>N-gram precision for Google Translate and 4 GPT-4 input prompts (prompts 1-4). Box plots display unigram (<bold>A</bold>), bigram (<bold>B</bold>), trigram (<bold>C</bold>), and fourgram (<bold>D</bold>) precision scores for translations generated by Google Translate and the 4 different GPT-4 input prompts.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e70222_fig02.png"/></fig></sec><sec id="s3-3"><title>Simplification Analysis</title><p>As measured by the Fern&#x00E1;ndez-Huerta scores, the simplified prompt 1 PEM translations and simplified AAOS Spanish PEMs demonstrated significant improvements in readability when compared to the original translations (<xref ref-type="fig" rid="figure3">Figure 3</xref>). The Wilcoxon (W) test for prompt 1 showed a significant difference between the original and simplified translations, with a W value of 3059 (<italic>P</italic>&#x003C;.001); the median difference was 7.846, and the Spearman correlation coefficient was 0.6459 (<italic>P</italic>&#x003C;.001). For the AAOS Spanish version, the Wilcoxon test revealed a significant improvement after simplification, with a W value of 3055 (<italic>P</italic>&#x003C;.001) and a median difference of 5.807; the Spearman correlation coefficient was 0.6731 (<italic>P</italic>&#x003C;.001).</p><p>For the INFLESZ scores, similar results were observed. For prompt 1, the Wilcoxon matched-pairs signed rank test indicated a significant difference between the original and simplified translations, with a W value of 3058 (<italic>P</italic>&#x003C;.001); the median difference was 7.830, and the Spearman correlation coefficient was 0.6591 (<italic>P</italic>&#x003C;.001). For the AAOS Spanish PEMs, the Wilcoxon test showed a significant improvement after simplification, with a W value of 3045 (<italic>P</italic>&#x003C;.001) and a median difference of 5.887; the Spearman correlation coefficient was 0.6926 (<italic>P</italic>&#x003C;.001).</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Fern&#x00E1;ndez-Huerta and INFLESZ scores for the original translations by prompt 1 and the AAOS and for their simplified versions. Box plots display the Fern&#x00E1;ndez-Huerta readability scores (<bold>A and B</bold>) and INFLESZ readability scores (<bold>C and D</bold>) for the original and simplified versions of the PEMs generated by GPT-4&#x2019;s prompt 1 (<bold>A and C</bold>) and for the original and simplified AAOS translations (<bold>B and D</bold>). AAOS: American Academy of Orthopaedic Surgery; PEM: patient education material.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e70222_fig03.png"/></fig></sec><sec id="s3-4"><title>Feature Analysis</title><p>The feature importance analysis of the original English text features revealed that the total number of syllables was the most influential predictor of BLEU scores across Google Translate and GPT-4 prompts, serving as the most important feature (ie, input variable) in every iteration, with scores ranging from 0.27 to 0.35 (<xref ref-type="fig" rid="figure4">Figure 4</xref>). The feature importance range for the number of words was 0.2 to 0.23, that for the average number of words per sentence was 0.19 to 0.27, and that for the average number of syllables per sentence was 0.22 to 0.27. Overall, syllable-based features, particularly the total number of syllables, served as the highest-importance features in determining BLEU scores across all translation methods.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Feature importance scores of English text characteristics for predicting BLEU scores. The heat map shows the relative importance of 4 input features&#x2014;number of words, average number of words per sentence, total number of syllables, and average number of syllables per sentence&#x2014;in predicting BLEU scores across the 4 BLEU analyses for each of the 5 translation methods. Darker colors represent higher feature importance. avg: average; BLEU: bilingual evaluation understudy; num: number.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="ai_v4i1e70222_fig04.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Context</title><p>Disparities in communication with Spanish-speaking populations can negatively affect patient education and subsequent outcomes in the field of orthopedic surgery [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref7">7</xref>]. Accurate translation of medical text is one component of properly educating Spanish-speaking patient populations about orthopedic conditions. For orthopedic surgeons, it is vital to ensure that Spanish-speaking patients are properly informed about their conditions and opportunities for surgery, given their increased propensity for hospital readmission, complications, and negative outlooks on surgical intervention [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref8">8</xref>]. Previous work provided a foundation for quantitatively evaluating AI-based medical text translation; however, no study has used BLEU methodology to provide a robust, machine learning&#x2013;based evaluation of translation success. Additionally, no study has evaluated the AI-enabled simplification of Spanish text. Given the recently outlined need for simplified Spanish text among Spanish-speaking patient populations, this is a pressing need in the field [<xref ref-type="bibr" rid="ref10">10</xref>]. Our study used a robust corpus of patient-facing orthopedic medical text that included language from across various subspecialties and topics of orthopedic surgery, including the spine, hip, knee, and upper extremities, among others. Through analyzing the success of openly accessible LLMs in translating such text, we aimed to comprehensively assess the translation options available for orthopedic practice.</p></sec><sec id="s4-2"><title>Translation Success</title><p>This study demonstrated that LLMs, such as ChatGPT, can translate orthopedic PEMs with moderate success, as quantified through BLEU analysis. By experimenting with 4 different model prompts, we explored whether prompt optimization could enhance translation effectiveness. Our findings suggest that while prompt optimization can improve translation outcomes, Google Translate generally provides superior translation quality when compared to human-translated benchmarks. This superior performance highlights the potential of Google Translate for rapid translation tasks, such as translating patient directives in discharge summaries and other patient-facing documents. However, despite its prevalent use, Google Translate&#x2019;s limitations underscore the need for alternative translation solutions [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>]. The feature analysis conducted within our study also revealed that the syllable complexity of the original English text is a critical predictor of successful translation for both Google Translate and ChatGPT, indicating areas for further refinement in translation approaches. An example AI translation, along with the original English and Spanish versions of the same PEM, can be found in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec><sec id="s4-3"><title>Simplification Success</title><p>We also assessed the capability of ChatGPT in simplifying medical texts written in Spanish, using a standardized simplification prompting structure that was previously evaluated by our group. Although the platform was able to simplify the text, it did not achieve the targeted grade level specified in our prompts. This limitation aligns with prior studies that highlighted challenges in simplifying English medical texts [<xref ref-type="bibr" rid="ref16">16</xref>]. However, despite existing challenges with the precision of AI-simplified text in meeting prespecified grade levels, the ability of ChatGPT to simplify texts could greatly benefit Spanish-speaking patients, given that no alternative exists to aid patient comprehension in this way. This is of great importance, considering the complexity of the PROMs and other tools used to assess the operative success of orthopedic procedures in this patient group [<xref ref-type="bibr" rid="ref10">10</xref>]. Further studies should elucidate ways to best optimize the simplification of Spanish texts via AI platforms.</p></sec><sec id="s4-4"><title>Recommendations</title><p>Based on our results, we offer several recommendations for orthopedic surgeons. Although Google Translate remains a superior tool for translating English to Spanish due to its adherence to human translation quality, LLMs, such as ChatGPT, also show moderate success and can be considered for specific use cases. Importantly, ChatGPT&#x2019;s ability to simplify Spanish texts makes it a valuable tool for enhancing patient comprehension and engagement, particularly when translation by a native Spanish speaker is not feasible. We recommend using ChatGPT as an adjunct tool for both translating and simplifying medical texts. Surgeons should continue to use Google Translate for straightforward translations, but they should also consider leveraging ChatGPT&#x2019;s simplification capabilities to improve the accessibility of medical information. Further research into simplification methodologies is essential for optimizing PROMs and ultimately enhancing patient satisfaction following surgical care. We believe that this technology, once it is fully optimized and vetted, will have the potential to be incorporated into the electronic health record to aid in medical record management through textual translation of records for patients.</p></sec><sec id="s4-5"><title>Limitations</title><p>This study, while providing insights into the potential of LLMs for translating and simplifying medical texts, has several limitations. First, this study assessed existing models, only tested English-to-Spanish translations, and used a relatively small amount of content, thereby limiting the generalizability of our findings. Second, the BLEU metric, which we used to evaluate translation accuracy, primarily measures literal translation and may not fully capture semantic equivalence, which is critical in medical contexts. Future research could benefit from incorporating additional evaluations that involve human assessment to provide a more nuanced analysis. Third, this study&#x2019;s focus was on technical performance; we did not directly measure the impact on patient outcomes, such as comprehension, adherence, and satisfaction. Future studies should aim to link the quality of translations and simplifications to specific patient-centered outcomes. Clinical studies would provide valuable insights into the way that Spanish-speaking patient populations interact with and subsequently benefit from AI-enhanced PEMs, such as those analyzed in this study. Lastly, although the corpus of 78 PEMs covered a broad scope of orthopedic literature from all subspecialties, this means that the results of this study only reflect the language used in standard orthopedic practice. Future studies should aim to replicate our results in other medical specialties to provide a broad understanding of the capabilities of AI in translation and simplification.</p></sec><sec id="s4-6"><title>Conclusions</title><p>This study highlights the utility and limitations of AI-driven tools in translating and simplifying medical texts for Spanish-speaking orthopedic patients. Our findings indicate that while Google Translate provides superior accuracy in translating medical texts, LLMs, such as ChatGPT, demonstrate moderate success and offer significant benefits in simplifying complex medical information into more comprehensible formats. Our recommended dual approach&#x2014;leveraging Google Translate for accuracy and ChatGPT for simplification&#x2014;presents a practical solution for enhancing patient education and engagement. Such advancements underscore the potential of AI to bridge the language gap in health care and thereby improve treatment outcomes. Future research should continue to refine these AI tools and enhance their precision and accessibility to meet the diverse needs of patient populations, thereby ensuring that all patients receive care that is both understandable and culturally competent.</p></sec></sec></body><back><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AAOS</term><def><p>American Academy of Orthopaedic Surgery</p></def></def-item><def-item><term id="abb2">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb3">BLEU</term><def><p>bilingual evaluation understudy</p></def></def-item><def-item><term id="abb4">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb5">NLTK</term><def><p>Natural Language Toolkit</p></def></def-item><def-item><term id="abb6">PEM</term><def><p>patient education material</p></def></def-item><def-item><term id="abb7">PROM</term><def><p>patient-reported outcome measure</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Woloshin</surname><given-names>S</given-names> </name><name name-style="western"><surname>Bickell</surname><given-names>NA</given-names> </name><name name-style="western"><surname>Schwartz</surname><given-names>LM</given-names> </name><name name-style="western"><surname>Gany</surname><given-names>F</given-names> </name><name name-style="western"><surname>Welch</surname><given-names>HG</given-names> </name></person-group><article-title>Language barriers in medicine in the United States</article-title><source>JAMA</source><year>1995</year><month>03</month><day>1</day><volume>273</volume><issue>9</issue><fpage>724</fpage><lpage>728</lpage><pub-id pub-id-type="medline">7853631</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Odlum</surname><given-names>M</given-names> </name><name name-style="western"><surname>Moise</surname><given-names>N</given-names> </name><name name-style="western"><surname>Kronish</surname><given-names>IM</given-names> </name><etal/></person-group><article-title>Trends in poor health indicators among Black and Hispanic middle-aged and older adults in the United States, 1999-2018</article-title><source>JAMA Netw Open</source><year>2020</year><month>11</month><day>2</day><volume>3</volume><issue>11</issue><fpage>e2025134</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2020.25134</pub-id><pub-id pub-id-type="medline">33175177</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Joo</surname><given-names>H</given-names> </name><name name-style="western"><surname>Fern&#x00E1;ndez</surname><given-names>A</given-names> </name><name name-style="western"><surname>Wick</surname><given-names>EC</given-names> </name><name name-style="western"><surname>Moreno Lepe</surname><given-names>G</given-names> </name><name name-style="western"><surname>Manuel</surname><given-names>SP</given-names> </name></person-group><article-title>Association of language barriers with perioperative and surgical outcomes: a systematic review</article-title><source>JAMA Netw Open</source><year>2023</year><month>07</month><day>3</day><volume>6</volume><issue>7</issue><fpage>e2322743</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2023.22743</pub-id><pub-id pub-id-type="medline">37432686</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chu</surname><given-names>JN</given-names> </name><name name-style="western"><surname>Wong</surname><given-names>J</given-names> </name><name name-style="western"><surname>Bardach</surname><given-names>NS</given-names> </name><etal/></person-group><article-title>Association between language discordance and unplanned hospital readmissions or emergency department revisits: a systematic review and meta-analysis</article-title><source>BMJ Qual Saf</source><year>2024</year><month>06</month><day>19</day><volume>33</volume><issue>7</issue><fpage>456</fpage><lpage>469</lpage><pub-id pub-id-type="doi">10.1136/bmjqs-2023-016295</pub-id><pub-id pub-id-type="medline">38160059</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Busigo Torres</surname><given-names>R</given-names> </name><name name-style="western"><surname>Yendluri</surname><given-names>A</given-names> </name><name name-style="western"><surname>Stern</surname><given-names>BZ</given-names> </name><etal/></person-group><article-title>Is limited English proficiency associated with differences in care processes and treatment outcomes in patients undergoing orthopaedic surgery? A systematic review</article-title><source>Clin Orthop Relat Res</source><year>2024</year><month>08</month><day>1</day><volume>482</volume><issue>8</issue><fpage>1374</fpage><lpage>1390</lpage><pub-id pub-id-type="doi">10.1097/CORR.0000000000003034</pub-id><pub-id pub-id-type="medline">39031039</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Azua</surname><given-names>E</given-names> </name><name name-style="western"><surname>Fortier</surname><given-names>LM</given-names> </name><name name-style="western"><surname>Carroll</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Spanish-speaking patients have limited access scheduling outpatient orthopaedic appointments compared with English-speaking patients across the United States</article-title><source>Arthrosc Sports Med Rehabil</source><year>2023</year><month>02</month><day>26</day><volume>5</volume><issue>2</issue><fpage>e465</fpage><lpage>e471</lpage><pub-id pub-id-type="doi">10.1016/j.asmr.2023.01.015</pub-id><pub-id pub-id-type="medline">37101862</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Aggarwal</surname><given-names>A</given-names> </name><name name-style="western"><surname>Naylor</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Adie</surname><given-names>S</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>VK</given-names> </name><name name-style="western"><surname>Harris</surname><given-names>IA</given-names> </name></person-group><article-title>Preoperative factors and patient-reported outcomes after total hip arthroplasty: multivariable prediction modeling</article-title><source>J Arthroplasty</source><year>2022</year><month>04</month><volume>37</volume><issue>4</issue><fpage>714</fpage><lpage>720.e4</lpage><pub-id pub-id-type="doi">10.1016/j.arth.2021.12.036</pub-id><pub-id pub-id-type="medline">34990754</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nguyen</surname><given-names>KH</given-names> </name><name name-style="western"><surname>Suarez</surname><given-names>P</given-names> </name><name name-style="western"><surname>Sales</surname><given-names>C</given-names> </name><name name-style="western"><surname>Fernandez</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ward</surname><given-names>DT</given-names> </name><name name-style="western"><surname>Manuel</surname><given-names>SP</given-names> </name></person-group><article-title>Patients who have limited English proficiency have decreased utilization of revision surgeries after hip and knee arthroplasty</article-title><source>J Arthroplasty</source><year>2023</year><month>08</month><volume>38</volume><issue>8</issue><fpage>1429</fpage><lpage>1433</lpage><pub-id pub-id-type="doi">10.1016/j.arth.2023.02.024</pub-id><pub-id pub-id-type="medline">36805120</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Greene</surname><given-names>NE</given-names> </name><name name-style="western"><surname>Fuentes-Ju&#x00E1;rez</surname><given-names>BN</given-names> </name><name name-style="western"><surname>Sabatini</surname><given-names>CS</given-names> </name></person-group><article-title>Access to orthopaedic care for Spanish-speaking patients in California</article-title><source>J Bone Joint Surg Am</source><year>2019</year><month>09</month><day>18</day><volume>101</volume><issue>18</issue><fpage>e95</fpage><pub-id pub-id-type="doi">10.2106/JBJS.18.01080</pub-id><pub-id pub-id-type="medline">31567810</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Garavito</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Rodarte</surname><given-names>P</given-names> </name><name name-style="western"><surname>Navarro</surname><given-names>RA</given-names> </name></person-group><article-title>Readability analysis of Spanish-language patient-reported outcome measures in orthopaedic surgery</article-title><source>J Bone Joint Surg Am</source><year>2024</year><month>10</month><day>16</day><volume>106</volume><issue>20</issue><fpage>1934</fpage><lpage>1942</lpage><pub-id pub-id-type="doi">10.2106/JBJS.23.01367</pub-id><pub-id pub-id-type="medline">38781322</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cook</surname><given-names>DJ</given-names> </name><name name-style="western"><surname>Moradkhani</surname><given-names>A</given-names> </name><name name-style="western"><surname>Douglas</surname><given-names>KSV</given-names> </name><name name-style="western"><surname>Prinsen</surname><given-names>SK</given-names> </name><name name-style="western"><surname>Fischer</surname><given-names>EN</given-names> </name><name name-style="western"><surname>Schroeder</surname><given-names>DR</given-names> </name></person-group><article-title>Patient education self-management during surgical recovery: combining mobile (iPad) and a content management system</article-title><source>Telemed J E Health</source><year>2014</year><month>04</month><volume>20</volume><issue>4</issue><fpage>312</fpage><lpage>317</lpage><pub-id pub-id-type="doi">10.1089/tmj.2013.0219</pub-id><pub-id pub-id-type="medline">24443928</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cohen</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Baimas-George</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ponce</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Is a picture worth a thousand words? A scoping review of the impact of visual aids on patients undergoing surgery</article-title><source>J Surg Educ</source><year>2024</year><month>09</month><volume>81</volume><issue>9</issue><fpage>1276</fpage><lpage>1292</lpage><pub-id pub-id-type="doi">10.1016/j.jsurg.2024.06.002</pub-id><pub-id pub-id-type="medline">38955659</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>C</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>S</given-names> </name></person-group><article-title>Utility of ChatGPT in clinical practice</article-title><source>J Med Internet Res</source><year>2023</year><month>06</month><day>28</day><volume>25</volume><fpage>e48568</fpage><pub-id pub-id-type="doi">10.2196/48568</pub-id><pub-id pub-id-type="medline">37379067</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Massey</surname><given-names>PA</given-names> </name><name name-style="western"><surname>Montgomery</surname><given-names>C</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>AS</given-names> </name></person-group><article-title>Comparison of ChatGPT-3.5, ChatGPT-4, and orthopaedic resident performance on orthopaedic assessment examinations</article-title><source>J Am Acad Orthop Surg</source><year>2023</year><month>12</month><day>1</day><volume>31</volume><issue>23</issue><fpage>1173</fpage><lpage>1179</lpage><pub-id pub-id-type="doi">10.5435/JAAOS-D-23-00396</pub-id><pub-id pub-id-type="medline">37671415</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Scherr</surname><given-names>R</given-names> </name><name name-style="western"><surname>Halaseh</surname><given-names>FF</given-names> </name><name name-style="western"><surname>Spina</surname><given-names>A</given-names> </name><name name-style="western"><surname>Andalib</surname><given-names>S</given-names> </name><name name-style="western"><surname>Rivera</surname><given-names>R</given-names> </name></person-group><article-title>ChatGPT interactive medical simulations for early clinical education: case study</article-title><source>JMIR Med Educ</source><year>2023</year><month>11</month><day>10</day><volume>9</volume><fpage>e49877</fpage><pub-id pub-id-type="doi">10.2196/49877</pub-id><pub-id pub-id-type="medline">37948112</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Spina</surname><given-names>A</given-names> </name><name name-style="western"><surname>Andalib</surname><given-names>S</given-names> </name><name name-style="western"><surname>Flores</surname><given-names>D</given-names> </name><name name-style="western"><surname>Vermani</surname><given-names>R</given-names> </name><name name-style="western"><surname>Halaseh</surname><given-names>FF</given-names> </name><name name-style="western"><surname>Nelson</surname><given-names>AM</given-names> </name></person-group><article-title>Evaluation of generative language models in personalizing medical information: instrument validation study</article-title><source>JMIR AI</source><year>2024</year><month>08</month><day>13</day><volume>3</volume><fpage>e54371</fpage><pub-id pub-id-type="doi">10.2196/54371</pub-id><pub-id pub-id-type="medline">39137416</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Picton</surname><given-names>B</given-names> </name><name name-style="western"><surname>Andalib</surname><given-names>S</given-names> </name><name name-style="western"><surname>Spina</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Assessing AI simplification of medical texts: readability and content fidelity</article-title><source>Int J Med Inform</source><year>2025</year><month>03</month><volume>195</volume><fpage>105743</fpage><pub-id pub-id-type="doi">10.1016/j.ijmedinf.2024.105743</pub-id><pub-id pub-id-type="medline">39667051</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Garcia Valencia</surname><given-names>OA</given-names> </name><name name-style="western"><surname>Thongprayoon</surname><given-names>C</given-names> </name><name name-style="western"><surname>Jadlowiec</surname><given-names>CC</given-names> </name><etal/></person-group><article-title>AI-driven translations for kidney transplant equity in Hispanic populations</article-title><source>Sci Rep</source><year>2024</year><month>04</month><day>12</day><volume>14</volume><issue>1</issue><fpage>8511</fpage><pub-id pub-id-type="doi">10.1038/s41598-024-59237-7</pub-id><pub-id pub-id-type="medline">38609476</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Brewster</surname><given-names>RCL</given-names> </name><name name-style="western"><surname>Gonzalez</surname><given-names>P</given-names> </name><name name-style="western"><surname>Khazanchi</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Performance of ChatGPT and Google Translate for pediatric discharge instruction translation</article-title><source>Pediatrics</source><year>2024</year><month>07</month><day>1</day><volume>154</volume><issue>1</issue><fpage>e2023065573</fpage><pub-id pub-id-type="doi">10.1542/peds.2023-065573</pub-id><pub-id pub-id-type="medline">38860299</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Papineni</surname><given-names>K</given-names> </name><name name-style="western"><surname>Roukos</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ward</surname><given-names>T</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>WJ</given-names> </name></person-group><article-title>BLEU: a method for automatic evaluation of machine translation</article-title><conf-name>40th Annual Meeting of the Association for Computational Linguistics</conf-name><conf-date>Jul 7-12, 2002</conf-date><conf-loc>Philadelphia, Pennsylvania</conf-loc><pub-id pub-id-type="doi">10.3115/1073083.1073135</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hasani</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zahergivar</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Evaluating the performance of Generative Pre-trained Transformer-4 (GPT-4) in standardizing radiology reports</article-title><source>Eur Radiol</source><year>2024</year><month>06</month><volume>34</volume><issue>6</issue><fpage>3566</fpage><lpage>3574</lpage><pub-id pub-id-type="doi">10.1007/s00330-023-10384-x</pub-id><pub-id pub-id-type="medline">37938381</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nicolson</surname><given-names>A</given-names> </name><name name-style="western"><surname>Dowling</surname><given-names>J</given-names> </name><name name-style="western"><surname>Koopman</surname><given-names>B</given-names> </name></person-group><article-title>Improving chest x-ray report generation by leveraging warm starting</article-title><source>Artif Intell Med</source><year>2023</year><month>10</month><volume>144</volume><fpage>102633</fpage><pub-id pub-id-type="doi">10.1016/j.artmed.2023.102633</pub-id><pub-id pub-id-type="medline">37783533</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Perea-Trigo</surname><given-names>M</given-names> </name><name name-style="western"><surname>Botella-L&#x00F3;pez</surname><given-names>C</given-names> </name><name name-style="western"><surname>Mart&#x00ED;nez-Del-Amor</surname><given-names>M&#x00C1;</given-names> </name><name name-style="western"><surname>&#x00C1;lvarez-Garc&#x00ED;a</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Soria-Morillo</surname><given-names>LM</given-names> </name><name name-style="western"><surname>Vegas-Olmos</surname><given-names>JJ</given-names> </name></person-group><article-title>Synthetic corpus generation for deep learning-based translation of Spanish sign language</article-title><source>Sensors (Basel)</source><year>2024</year><month>02</month><day>24</day><volume>24</volume><issue>5</issue><fpage>1472</fpage><pub-id pub-id-type="doi">10.3390/s24051472</pub-id><pub-id pub-id-type="medline">38475008</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Andalib</surname><given-names>S</given-names> </name><name name-style="western"><surname>Solomon</surname><given-names>SS</given-names> </name><name name-style="western"><surname>Picton</surname><given-names>BG</given-names> </name><name name-style="western"><surname>Spina</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Scolaro</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Nelson</surname><given-names>AM</given-names> </name></person-group><article-title>Source characteristics influence AI-enabled orthopaedic text simplification: recommendations for the future</article-title><source>JB JS Open Access</source><year>2025</year><month>01</month><day>8</day><volume>10</volume><issue>1</issue><fpage>e24.00007</fpage><pub-id pub-id-type="doi">10.2106/JBJS.OA.24.00007</pub-id><pub-id pub-id-type="medline">39781102</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Spina</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Fereydouni</surname><given-names>P</given-names> </name><name name-style="western"><surname>Tang</surname><given-names>JN</given-names> </name><name name-style="western"><surname>Andalib</surname><given-names>S</given-names> </name><name name-style="western"><surname>Picton</surname><given-names>BG</given-names> </name><name name-style="western"><surname>Fox</surname><given-names>AR</given-names> </name></person-group><article-title>Tailoring glaucoma education using large language models: addressing health disparities in patient comprehension</article-title><source>Medicine (Baltimore)</source><year>2025</year><month>01</month><day>10</day><volume>104</volume><issue>2</issue><fpage>e41059</fpage><pub-id pub-id-type="doi">10.1097/MD.0000000000041059</pub-id><pub-id pub-id-type="medline">39792725</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="web"><article-title>Overview - OpenAI API</article-title><source>OpenAI</source><access-date>2025-03-03</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://platform.openai.com">https://platform.openai.com</ext-link></comment></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Bird</surname><given-names>S</given-names> </name><name name-style="western"><surname>Klein</surname><given-names>E</given-names> </name><name name-style="western"><surname>Loper</surname><given-names>E</given-names> </name></person-group><source>Natural Language Processing with Python</source><year>2009</year><edition>1</edition><publisher-name>O&#x2019;Reilly Media Inc</publisher-name><pub-id pub-id-type="other">9780596516499</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fern&#x00E1;ndez-Huerta</surname><given-names>J</given-names> </name></person-group><article-title>Medidas sencillas de lecturabilidad [Article in Spanish]</article-title><source>Consigna</source><year>1959</year><volume>214</volume><fpage>29</fpage><lpage>32</lpage></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Barrio-Cantalejo</surname><given-names>IM</given-names> </name><name name-style="western"><surname>Sim&#x00F3;n-Lorda</surname><given-names>P</given-names> </name><name name-style="western"><surname>Melguizo</surname><given-names>M</given-names> </name><name name-style="western"><surname>Escalona</surname><given-names>I</given-names> </name><name name-style="western"><surname>Mariju&#x00E1;n</surname><given-names>MI</given-names> </name><name name-style="western"><surname>Hernando</surname><given-names>P</given-names> </name></person-group><article-title>Validaci&#x00F3;n de la Escala INFLESZ para evaluar la legibilidad de los textos dirigidos a pacientes [Article in Spanish]</article-title><source>Anales Sis San Navarra</source><year>2008</year><volume>31</volume><issue>2</issue><fpage>135</fpage><lpage>152</lpage><pub-id pub-id-type="doi">10.4321/S1137-66272008000300004</pub-id><pub-id pub-id-type="medline">18953362</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Taira</surname><given-names>BR</given-names> </name><name name-style="western"><surname>Kreger</surname><given-names>V</given-names> </name><name name-style="western"><surname>Orue</surname><given-names>A</given-names> </name><name name-style="western"><surname>Diamond</surname><given-names>LC</given-names> </name></person-group><article-title>A pragmatic assessment of Google Translate for emergency department instructions</article-title><source>J Gen Intern Med</source><year>2021</year><month>11</month><volume>36</volume><issue>11</issue><fpage>3361</fpage><lpage>3365</lpage><pub-id pub-id-type="doi">10.1007/s11606-021-06666-z</pub-id><pub-id pub-id-type="medline">33674922</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Patil</surname><given-names>S</given-names> </name><name name-style="western"><surname>Davies</surname><given-names>P</given-names> </name></person-group><article-title>Use of Google Translate in medical communication: evaluation of accuracy</article-title><source>BMJ</source><year>2014</year><month>12</month><day>15</day><volume>349</volume><fpage>g7392</fpage><pub-id pub-id-type="doi">10.1136/bmj.g7392</pub-id><pub-id pub-id-type="medline">25512386</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Example artificial intelligence&#x2013;translated patient education material (PEM) with original English and original Spanish PEMs.</p><media xlink:href="ai_v4i1e70222_app1.docx" xlink:title="DOCX File, 31 KB"/></supplementary-material></app-group></back></article>