<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR AI</journal-id>
      <journal-title>JMIR AI</journal-title>
      <issn pub-type="epub">2817-1705</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v3i1e42630</article-id>
      <article-id pub-id-type="pmid">38875551</article-id>
      <article-id pub-id-type="doi">10.2196/42630</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Online Health Search Via Multidimensional Information Quality Assessment Based on Deep Language Models: Algorithm Development and Validation</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Malin</surname>
            <given-names>Bradley</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Carvalho</surname>
            <given-names>Darlinton</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>He</surname>
            <given-names>Daqing</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Marchesin</surname>
            <given-names>Stefano</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Boya</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Radiology and Medical Informatics</institution>
            <institution>University of Geneva</institution>
            <addr-line>9 Chemin des Mines</addr-line>
            <addr-line>Geneva, 1202</addr-line>
            <country>Switzerland</country>
            <phone>41 782331908</phone>
            <email>boya.zhang@unige.ch</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4439-8212</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Naderi</surname>
            <given-names>Nona</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1272-7640</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Mishra</surname>
            <given-names>Rahul</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7511-2718</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Teodoro</surname>
            <given-names>Douglas</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6238-4503</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Radiology and Medical Informatics</institution>
        <institution>University of Geneva</institution>
        <addr-line>Geneva</addr-line>
        <country>Switzerland</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Computer Science</institution>
        <institution>Université Paris-Saclay</institution>
        <institution>Centre national de la recherche scientifique, Laboratoire Interdisciplinaire des Sciences du Numérique</institution>
        <addr-line>Orsay</addr-line>
        <country>France</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Boya Zhang <email>boya.zhang@unige.ch</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>2</day>
        <month>5</month>
        <year>2024</year>
      </pub-date>
      <volume>3</volume>
      <elocation-id>e42630</elocation-id>
      <history>
        <date date-type="received">
          <day>12</day>
          <month>9</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>10</day>
          <month>4</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>12</day>
          <month>7</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>15</day>
          <month>1</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Boya Zhang, Nona Naderi, Rahul Mishra, Douglas Teodoro. Originally published in JMIR AI (https://ai.jmir.org), 02.05.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR AI, is properly cited. The complete bibliographic information, a link to the original publication on https://www.ai.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://ai.jmir.org/2024/1/e42630" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Widespread misinformation in web resources can lead to serious implications for individuals seeking health advice. Despite that, information retrieval models are often focused only on the query-document relevance dimension to rank results.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We investigate a multidimensional information quality retrieval model based on deep learning to enhance the effectiveness of online health care information search results.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>In this study, we simulated online health information search scenarios with a topic set of 32 different health-related inquiries and a corpus containing 1 billion web documents from the April 2019 snapshot of Common Crawl. Using state-of-the-art pretrained language models, we assessed the quality of the retrieved documents according to their usefulness, supportiveness, and credibility dimensions for a given search query on 6030 human-annotated, query-document pairs. We evaluated this approach using transfer learning and more specific domain adaptation techniques.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>In the transfer learning setting, the usefulness model provided the largest distinction between help- and harm-compatible documents, with a difference of +5.6%, leading to a majority of helpful documents in the top 10 retrieved. The supportiveness model achieved the best harm compatibility (+2.4%), while the combination of usefulness, supportiveness, and credibility models achieved the largest distinction between help- and harm-compatibility on helpful topics (+16.9%). In the domain adaptation setting, the linear combination of different models showed robust performance, with help-harm compatibility above +4.4% for all dimensions and going as high as +6.8%.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>These results suggest that integrating automatic ranking models created for specific information quality dimensions can increase the effectiveness of health-related information retrieval. Thus, our approach could be used to enhance searches made by individuals seeking online health information.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>health misinformation</kwd>
        <kwd>information retrieval</kwd>
        <kwd>deep learning</kwd>
        <kwd>language model</kwd>
        <kwd>transfer learning</kwd>
        <kwd>infodemic</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>In today’s digital age, individuals with diverse information needs, medical knowledge, and linguistic skills [<xref ref-type="bibr" rid="ref1">1</xref>] turn to the web for health advice and to make treatment decisions [<xref ref-type="bibr" rid="ref2">2</xref>]. The mixture of facts and rumors in online resources [<xref ref-type="bibr" rid="ref3">3</xref>] makes it challenging for users to discern accurate content [<xref ref-type="bibr" rid="ref4">4</xref>]. To provide high-quality resources and enable properly informed decision-making [<xref ref-type="bibr" rid="ref5">5</xref>], information retrieval systems should differentiate between accurate and misinforming content [<xref ref-type="bibr" rid="ref6">6</xref>]. Nevertheless, search engines rank documents mainly by their relevance to the search query [<xref ref-type="bibr" rid="ref7">7</xref>], neglecting several health information quality concerns. Moreover, despite attempts by some search engines to combat misinformation [<xref ref-type="bibr" rid="ref8">8</xref>], they lack transparency in terms of the methodology used and performance evaluation.</p>
      <p><italic>Health misinformation</italic> is defined as health-related information that is inaccurate or misleading based on current scientific evidence [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. Due to the lack of health literacy for nonprofessionals [<xref ref-type="bibr" rid="ref11">11</xref>] and the rise of the infodemic phenomenon [<xref ref-type="bibr" rid="ref12">12</xref>]—the rapid spread of both accurate and inaccurate information about a medical topic on the internet [<xref ref-type="bibr" rid="ref13">13</xref>]—health misinformation has become increasingly prevalent online. Topics related to misinformation, such as “vaccine” or “the relationship between coronavirus and 5G” have gained scientific interest across social media platforms like Twitter and Instagram [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref16">16</xref>] and among various countries [<xref ref-type="bibr" rid="ref17">17</xref>]. Thus, the development of new credibility-centered search methods and assessment measures is crucial to address the pressing challenges in health-related information retrieval [<xref ref-type="bibr" rid="ref18">18</xref>].</p>
      <p>In recent years, numerous approaches have been introduced in the literature to categorize and assess misinformation according to multiple dimensions. Hesse et al [<xref ref-type="bibr" rid="ref19">19</xref>] proposed 7 dimensions of <italic>truthfulness</italic>, which include <italic>correctness, neutrality, comprehensibility, precision, completeness, speaker trustworthiness</italic>, and <italic>informativeness</italic>. On the other hand, van der Linden [<xref ref-type="bibr" rid="ref20">20</xref>] categorized an infodemic into 3 key dimensions: <italic>susceptibility</italic>, <italic>spread</italic>, and <italic>immunization</italic>. Information retrieval shared tasks, such as the Text Retrieval Conference (TREC) and the Conference and Labs of the Evaluation Forum (CLEF), have also started evaluating quality-based systems for health corpora using multiple dimensions [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. The CLEF eHealth Lab Series proposed a benchmark to evaluate models according to the <italic>relevance</italic>, <italic>readability</italic>, and <italic>credibility</italic> of the retrieved information [<xref ref-type="bibr" rid="ref23">23</xref>]. The TREC Health Misinformation Track 2021 proposed further metrics of <italic>usefulness</italic>, <italic>supportiveness</italic>, and <italic>credibility</italic> [<xref ref-type="bibr" rid="ref24">24</xref>]. These dimensions also appear in the TREC Health Misinformation Track 2019 as <italic>relevancy</italic>, <italic>efficacy</italic>, and <italic>credibility</italic>, respectively. Additionally, models by Solainayagi and Ponnusamy [<xref ref-type="bibr" rid="ref25">25</xref>] and Li et al [<xref ref-type="bibr" rid="ref26">26</xref>] incorporated similar dimensions, emphasizing source <italic>reliability</italic> and the <italic>credibility</italic> of statements. These metrics represent some of the initial efforts to quantitatively assess the effectiveness of information retrieval engines in sourcing high-quality information, marking a shift from the traditional query-document relevance paradigm [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>]. Despite their variations, these information quality metrics focus on the following 3 main common topics: (1) <italic>relevancy</italic> (also called <italic>usefulness</italic> or <italic>informativeness</italic>) of the source to the search topic, (2) <italic>correctness</italic> (also called <italic>supportiveness</italic> or <italic>efficacy</italic>) of the information according to the search topic, and (3) <italic>credibility</italic> (also called <italic>trustworthiness</italic>) of the source.</p>
      <p>Thanks to these open shared tasks, several significant methodologies have been developed to improve the search for higher-quality health information. Although classical bag-of-words–based methods outperform neural network approaches in detecting health-related misinformation when training data are limited [<xref ref-type="bibr" rid="ref29">29</xref>], more advanced approaches are needed for web content. Specifically, research has proven the effectiveness of a hybrid approach that integrates classical handcrafted features with deep learning [<xref ref-type="bibr" rid="ref18">18</xref>]. Further to this, multistage ranking systems [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>], which couple the system with a label prediction model or use T5 [<xref ref-type="bibr" rid="ref32">32</xref>] to rerank Okapi Best Match 25 (BM25) results, have been proposed. Particularly, Lima et al [<xref ref-type="bibr" rid="ref30">30</xref>] considered the stance of the search query and engaged 2 assessors for an interactive search, integrating a continuous active learning method [<xref ref-type="bibr" rid="ref33">33</xref>]. This approach sets a baseline of human effort in separating helpful from harmful web content. Despite their success, these models often do not take into account the different information quality aspects in their design.</p>
      <p>In this study, we aimed to investigate the impact of multidimensional ranking on improving the quality of retrieved health-related information. Due to its coverage of the main information quality dimensions used in the scientific literature, we followed the empirical approach proposed in the TREC 2021 challenge, which considers <italic>usefulness</italic>, <italic>supportiveness</italic>, and <italic>credibility</italic> metrics, to propose a multidimensional ranking model. Using deep learning–based pretrained language models [<xref ref-type="bibr" rid="ref34">34</xref>] through transfer learning and domain adaption approaches, we categorized the retrieved web resources according to different information quality dimensions. Specialized quality-oriented ranks obtained by reranking components were then fused [<xref ref-type="bibr" rid="ref32">32</xref>] to provide the final ranked list. In contrast to prior studies, our approach relied on the automatic detection of harmful (or inaccurate) claims and used a multidimensional information quality model to boost helpful resources.</p>
      <p>The main contributions of this work are 3-fold. We propose a multidimensional ranking model based on transfer learning and showed that it achieves state-of-the-art in automatic (ie, when the query stance is not provided) quality-centered ranking evaluations. We investigated our approach in 2 learning settings—transfer learning (ie, without query relevance judgments) and domain adaptation (ie, with query relevance judgments from a different corpus)—and demonstrated that they are capable of identifying more helpful documents than harmful ones, obtaining +5% and +7% help and harm compatibility scores, respectively. Last, we investigated how the combination of models specialized in different information dimensions impacts the quality of the results, and our analysis suggests that multidimensional aspects are crucial for extracting high-quality information, especially for unhelpful topics.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <p>In this section, we introduce our search model based on multidimensional information quality aspects. We first describe the evaluation benchmark. We then detail the implementation methodology and describe our evaluation experiments using transfer learning and domain adaptation strategies.</p>
      <sec>
        <title>TREC Health Misinformation Track 2021 Benchmark</title>
        <sec>
          <title>Benchmark Data Set</title>
          <p>To evaluate our approach, we used the TREC Health Misinformation Track 2021 benchmark [<xref ref-type="bibr" rid="ref35">35</xref>] organized by the National Institute of Standards and Technology (NIST) [<xref ref-type="bibr" rid="ref36">36</xref>]. The TREC Health Misinformation Track 2021 benchmark simulates web searches for specific health issues and interventions against a collection of English web documents [<xref ref-type="bibr" rid="ref37">37</xref>]. For each topic, the benchmark annotates the quality of the retrieved web documents using a pooling approach, in which the top retrieved documents by systems participating in the challenge are evaluated according to their usefulness, correctness, and credibility and subsequently labeled as helpful or harmful. In this context, helpful documents are defined as those supportive of helpful treatments or that try to dissuade the reader from using unhelpful treatments, while harmful documents encourage the use of unhelpful treatments or dissuade the reader from using helpful treatments [<xref ref-type="bibr" rid="ref24">24</xref>]. See Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for more detail on the annotation.</p>
        </sec>
        <sec>
          <title>Health-Related Topics</title>
          <p>A topic in the TREC Health Misinformation Track 2021 benchmark consists of a health issue, an intervention, a query that connects the corresponding intervention to the health problem, and a description that resembles the web search question using natural language. NIST only provided assessments for 35 of the initial 50 topics. Among the assessed topics, 3 were further excluded due to the absence of harmful documents. Consequently, the benchmark consisted of 32 topics: 14 labeled as helpful and 18 labeled as unhelpful. For these queries, a total of 6030 query-document pairs were human-annotated according to different scales of usefulness, correctness, and credibility scores. A “helpful topic” refers to an intervention beneficial for treating a health issue, while an “unhelpful topic” indicates an ineffective intervention. The stance is supported by evidence from a credible source. <xref ref-type="table" rid="table1">Table 1</xref> presents examples of the queries and descriptions of helpful and unhelpful topics.</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>Examples of helpful and unhelpful topics with query and description.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="90"/>
              <col width="280"/>
              <col width="470"/>
              <col width="160"/>
              <thead>
                <tr valign="top">
                  <td>Number</td>
                  <td>Query</td>
                  <td>Description</td>
                  <td>Stance</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>106</td>
                  <td>vitamin b12 sun exposure vitiligo</td>
                  <td>Can vitamin b12 and sun exposure together help treat vitiligo?</td>
                  <td>Helpful</td>
                </tr>
                <tr valign="top">
                  <td>102</td>
                  <td>tepid sponge bath reduce fever children</td>
                  <td>Is a tepid sponge bath a good way to reduce fever in children?</td>
                  <td>Unhelpful</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
        </sec>
        <sec>
          <title>Web Corpus</title>
          <p>We used the Colossal Clean Crawled Corpus (C4), a collection of English-language web documents sourced from the public Common Crawl web scrape [<xref ref-type="bibr" rid="ref38">38</xref>]. The corpus comprises 1 billion English documents from the April 2019 snapshot. To illustrate the contradictory nature of the web information within the corpus, in <xref ref-type="table" rid="table2">Table 2</xref>, we present 2 documents relevant to topic 102: “tepid sponge bath reduce fever in children.” Although an article advises against the intervention (“Do Not Use Sponging to Reduce a Fever”), another article advises it could be a viable option (“Sponging is an option for high fevers”).</p>
          <table-wrap position="float" id="table2">
            <label>Table 2</label>
            <caption>
              <p>Examples of useful but contradictory documents for Topic 102: “Is a tepid sponge bath a good way to reduce fever in children?”.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="160"/>
              <col width="410"/>
              <col width="430"/>
              <thead>
                <tr valign="top">
                  <td>Article information</td>
                  <td>Article 1</td>
                  <td>Article 2</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Doc ID</td>
                  <td>en.noclean.c4-train.07165-of-07168.96468</td>
                  <td>en.noclean.c4-train.00001-of-07168.126948</td>
                </tr>
                <tr valign="top">
                  <td>Time stamp</td>
                  <td>2019-04-25T18:00:17Z</td>
                  <td>2019-04-23T20:13:31Z</td>
                </tr>
                <tr valign="top">
                  <td>Text</td>
                  <td>[...] Do Not Use Sponging to Reduce a Fever. It is not recommended that you use sponging to reduce your child’s fever. There is no information that shows that sponging or tepid baths improve your child’s discomfort associated with a fever or an illness. Cool or cold water can cause shivering and increase your child’s temperature. Also, never add rubbing alcohol to the water. Rubbing alcohol can be absorbed into the skin or inhaled, causing serious problems such as a coma. [...]</td>
                  <td>[...] Sponging With Lukewarm Water: Note: Sponging is an option for high fevers, but not required. It is rarely needed. When to Use: Fever above 104° F (40° C) AND doesn’t come down with fever meds. Always give the fever medicine at least an hour to work before sponging. How to Sponge: Use lukewarm water (85 - 90° F) (29.4 - 32.2° C). Sponge for 20-30 minutes. If your child shivers or becomes cold, stop sponging. [...]</td>
                </tr>
                <tr valign="top">
                  <td>URL</td>
                  <td>https://patiented.solutions.aap.org/</td>
                  <td>https://childrensclinicofraceland.com/</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
        </sec>
      </sec>
      <sec>
        <title>Quality-Based Multidimensional Ranking Conceptual Model</title>
        <sec>
          <title>Phases</title>
          <p>The quality-based multidimensional ranking model proposed in this work is presented in <xref rid="figure1" ref-type="fig">Figure 1</xref>A. The information retrieval process can be divided into 2 phases: <italic>preprocessing</italic> and <italic>multidimensional ranking</italic>. In the preprocessing phase, for a given topic <italic>j</italic>, <italic>N<sub>D</sub></italic> documents were retrieved based on their relevance (eg, using a BM25 model) [<xref ref-type="bibr" rid="ref39">39</xref>]. In the multidimensional ranking phase, we further estimated the quality of the retrieved subset of documents according to the usefulness, supportiveness, and credibility dimensions. In the following sections, we describe the multidimensional ranking approach and its implementation using transfer learning and domain adaption. We then describe the preprocessing step, which can be performed based on sparse or dense retrieval engines.</p>
          <fig id="figure1" position="float">
            <label>Figure 1</label>
            <caption>
              <p>Quality-based multidimensional ranking models: (A) general pipeline, (B) supportiveness model for the transfer learning approach. BERT: Bidirectional Encoder Representations from Transformers; C4: Colossal Clean Crawled Corpus; NIST: National Institute of Standards and Technology.</p>
            </caption>
            <graphic xlink:href="ai_v3i1e42630_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Multidimensional Ranking</title>
          <p>To provide higher-quality documents at the top ranks, we proposed using a set of machine learning models trained to classify documents according to the usefulness, supportiveness, and credibility dimensions. For the initial rank list obtained in the preprocessing phase (see details in the following sections), the documents were reranked in parallel according to the following strategies for usefulness, supportiveness, and credibility.</p>
          <sec>
            <title>Usefulness</title>
            <p>The usefulness dimension is defined as <italic>the extent to which the document contains information that a search user would find useful in answering the topic’s question</italic>. In this sense, it defines how pertinent a document is to a given topic. Thus, to compute the usefulness of retrieved documents, topic-document similarity models based on pretrained language models, such as Bidirectional Encoder Representations from Transformers (BERT)–base [<xref ref-type="bibr" rid="ref40">40</xref>], mono-BERT-large [<xref ref-type="bibr" rid="ref41">41</xref>], and ELECTRA [<xref ref-type="bibr" rid="ref42">42</xref>], could be used. Given a topic-document pair, the language model infers a score that gives the level of similarity between the 2 input text passages. Although bag-of-words models, such as BM25, provide a strong baseline for usefulness, they do not consider word relations by learning context-sensitive representations as is the case with the pretrained language models, which are used to enhance the quality of the original ranking [<xref ref-type="bibr" rid="ref28">28</xref>].</p>
          </sec>
          <sec>
            <title>Supportiveness</title>
            <p>The supportiveness dimension defines whether <italic>the document supports or dissuades the use of the treatment in the topic’s question</italic>. Therefore, it defines the stance of the document on the health topic. In this dimension, documents are identified under 3 levels: (1) supportive (ie, the document supports the treatment), (2) dissuasive (ie, the document refutes the treatment), and (3) neutral (ie, the document does not contain enough information to make the decision) [<xref ref-type="bibr" rid="ref35">35</xref>]. To compute the supportiveness of a document to a given query, the system should be optimized so that documents that are either supportive, if the topic is helpful, or dissuasive, if the topic is unhelpful, are boosted to the top of the ranking list, which means that correct documents are boosted and misinforming documents are downgraded.</p>
          </sec>
          <sec>
            <title>Credibility</title>
            <p>The credibility dimension defines <italic>whether the document is considered credible by the assessor</italic>, that is, how trustworthy the source document is. To compute this dimension, the content of the document itself could be used (eg, leveraging language features, such as readability [<xref ref-type="bibr" rid="ref43">43</xref>]), which is assessable using the Simple Measure of Gobbledygook index [<xref ref-type="bibr" rid="ref44">44</xref>]. Moreover, document metadata could be also used, such as incoming and outcoming links, which can be calculated with link analysis algorithms [<xref ref-type="bibr" rid="ref45">45</xref>], and URL addresses considered to be trusted sources [<xref ref-type="bibr" rid="ref46">46</xref>].</p>
          </sec>
        </sec>
        <sec>
          <title>Transfer Learning Implementation</title>
          <p>To implement the multidimensional ranking model in scenarios in which relevance judgments are not available, we proposed multiple (pretrained) models for each of the quality dimensions using transfer learning.</p>
          <sec>
            <title>Usefulness</title>
            <p>In this reranking step, we created an ensemble of pretrained language models—BERT-base, mono-BERT-large, and ELECTRA—all fine-tuned in the MS MARCO [<xref ref-type="bibr" rid="ref47">47</xref>] data set. Each model then predicted the similarity between the topic and the initial list of retrieved documents. Their results were finally combined using reciprocal rank fusion (RRF) [<xref ref-type="bibr" rid="ref32">32</xref>].</p>
          </sec>
          <sec>
            <title>Supportiveness</title>
            <p>In this reranking step (<xref rid="figure1" ref-type="fig">Figure 1</xref>B), we created an ensemble of claim-checking models—robustly optimized BERT approach (RoBERTa)–Large [<xref ref-type="bibr" rid="ref48">48</xref>], BioMedRoBERTa-base [<xref ref-type="bibr" rid="ref49">49</xref>], and SciBERT-base [<xref ref-type="bibr" rid="ref50">50</xref>]—which were fine-tuned on the FEVER [<xref ref-type="bibr" rid="ref51">51</xref>] and SciFact [<xref ref-type="bibr" rid="ref52">52</xref>] data sets. Claim-checking models take a claim and a document as the information source and validate the veracity of the claim based on the document content [<xref ref-type="bibr" rid="ref53">53</xref>]. Most claim-checking models assume that document content is ground truth. Since this is not valid in the case of web documents, we added a further classification step that evaluates the correctness of the retrieved documents. We used the top-<italic>k</italic> assignments [<xref ref-type="bibr" rid="ref44">44</xref>] provided by the claim-checking models to define whether the topic should be supported or refuted. The underlying assumption is that a scientific fact is defined by the largest number of evidence available for a topic. A higher rank is then given to the correct supportive or dissuasive documents, a medium rank is given to the neutral documents, and a lower rank is given to the incorrect supportive or dissuasive documents. The rank lists obtained for each model were then combined using RRF.</p>
          </sec>
          <sec>
            <title>Credibility</title>
            <p>In this step, we implemented a random forest classifier trained on the Microsoft Credibility data set [<xref ref-type="bibr" rid="ref54">54</xref>] with a set of credibility-related features, such as readability, openpage rank [<xref ref-type="bibr" rid="ref45">45</xref>], and the number of cascading style sheets (CSS). The data set manually rated 1000 web pages with credibility scores between 1 (“very noncredible”) and 5 (“very credible”). We converted these scores for a binary classification setting—that is, scores of 4 and 5 were considered as 1 or <italic>credible</italic>, and scores of 1, 2, and 3 were considered as 0 or <italic>noncredible</italic>. For the readability score, we relied on the Simple Measure of Gobbledygook index [<xref ref-type="bibr" rid="ref44">44</xref>], which estimates the years of education an average person needs to understand a piece of writing. Following Schwarz and Morris [<xref ref-type="bibr" rid="ref54">54</xref>], we retrieved a web page’s PageRank and used it as a feature to train the classifier. We further used the number of CSS style definitions to estimate the effort for the design of a web page [<xref ref-type="bibr" rid="ref55">55</xref>]. Last, a list of credible websites scrapped from the Health On the Net search engine [<xref ref-type="bibr" rid="ref46">46</xref>] for the evaluated topics was combined with the baseline model to explore better performance. The result of the classifier was added to the unitary value of the Health On the Net credible sites [<xref ref-type="bibr" rid="ref46">46</xref>].</p>
          </sec>
        </sec>
        <sec>
          <title>Domain Adaptation Implementation</title>
          <p>To implement the multidimensional ranking model in scenarios in which relevance judgments are available, we compared different pretrained language models—BERT, BioBERT [<xref ref-type="bibr" rid="ref56">56</xref>], and BigBird [<xref ref-type="bibr" rid="ref57">57</xref>]—for each of the quality dimensions using domain adaptation. In this case, each model was fine-tuned to predict the relevance judgment of a specific dimension (ie, usefulness, supportiveness, and credibility). Although the input size was limited to 512 tokens for the first 2 models, BigBird allows up to 4096 tokens.</p>
          <p>We used the TREC 2019 Decision Track [<xref ref-type="bibr" rid="ref33">33</xref>] benchmark data set to fine-tune our specific quality dimension models. The TREC 2019 Decision Track benchmark data set contains 51 topics evaluated across 3 dimensions: relevance, effectiveness, and credibility. Adhering to the experimental design set by [<xref ref-type="bibr" rid="ref58">58</xref>], we mapped the 2019 and 2021 benchmarks as follows. The relevance dimension (2019) was mapped to usefulness (2021), with highly relevant documents translated as very useful and relevant documents as useful. The effectiveness dimension (2019) was mapped to supportiveness (2021), with effective labels reinterpreted as supportive and ineffective as dissuasive. The credibility dimension (2019) was directly mapped to credibility (2021) using the same labels.</p>
          <p>The 2019 track uses the ClueWeb12-B13 [<xref ref-type="bibr" rid="ref59">59</xref>,<xref ref-type="bibr" rid="ref60">60</xref>] corpus, which contains 50 million pages. More details on the TREC 2019 Decision Track [<xref ref-type="bibr" rid="ref33">33</xref>] benchmark are provided in Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          <p>In the training phase, the language models received as input were the pair topic-document and a label for each dimension according to the 2019-2021 mapping strategy. At the inference time, given a topic-document pair from the TREC Health Misinformation Track 2021 benchmark, the model would infer its usefulness, supportiveness, or credibility based on the dimension on which it was trained.</p>
        </sec>
        <sec>
          <title>Preprocessing or Ranking Phase</title>
          <p>In the preprocessing step, which is initially executed to select a short list of candidate documents for the input query, a BM25 model was used. This step was performed using a bag-of-words model due to its efficiency. For the C4 snapshot collection, 2 indices were created, one using standard BM25 parameters and another fine-tuned using a collection of topics automatically generated (silver standard) from a set of 4985 indexed documents. For a given document, the silver topic was created based on the keyword2query [<xref ref-type="bibr" rid="ref61">61</xref>] and doc2query [<xref ref-type="bibr" rid="ref41">41</xref>] models to provide the query and description content, respectively. Using the silver topics and their respective documents, the BM25 parameters of the second index were then fine-tuned using grid search in a known-item search approach [<xref ref-type="bibr" rid="ref62">62</xref>] (ie, for a given silver topic, the model should return in the top-1 the respective document used to generate it). The results of these 2 indices were fused using RRF.</p>
        </sec>
      </sec>
      <sec>
        <title>Evaluation Metric</title>
        <p>We followed the official TREC evaluation strategy and used the compatibility metric [<xref ref-type="bibr" rid="ref46">46</xref>] to assess the performance of our models. Contrary to the classic information retrieval tasks, in which the performance metric relies on the degree of relatedness between queries and documents, in quality retrieval, harmful documents should be penalized, especially if they are relevant to the query content. In this context, the compatibility metric calculates the similarity between the actual ranking <italic>R</italic> provided by a model and an ideal ranking <italic>I</italic> as provided by the query relevance annotations. According to Equation 1, the compatibility is calculated with the rank-biased overlap (RBO) [<xref ref-type="bibr" rid="ref63">63</xref>] similarity metric, which is top-weighted, with greater weight placed at higher ranks to address the indeterminate and incomplete nature of web search results [<xref ref-type="bibr" rid="ref64">64</xref>]:</p>
        <disp-formula>
          <graphic xlink:href="ai_v3i1e42630_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>where the parameter <italic>p</italic> represents the searcher's patience or persistence and is set to 0.95 in our experiments and K is the search depth and is set to 1000 to bring <italic>pK</italic>-1 as close to 0 as possible. As shown in Equation 2, an additional normalization step was added to accommodate short, truncated ideal results, so when there are fewer documents in the ideal ranking than in the actual ranking list, it does not influence the compatibility computation results:</p>
        <disp-formula>
          <graphic xlink:href="ai_v3i1e42630_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>To ensure that helpful and harmful documents are treated differently, even if both might be relevant to the query content, the assessments were divided into “help compatibility” (help) and “harm compatibility” (harm) metrics. To evaluate the ability of the system to separate helpful from harmful information, the “harm compatibility” results were then subtracted from the “help compatibility” results, which were marked as “help-harm compatibility” (help-harm). Overall, the more a ranking is compatible with the ideal helpful ranking, the better it is. Conversely, the more a ranking is compatible with the ideal harmful ranking, the worse it is.</p>
      </sec>
      <sec>
        <title>Experimental Setup</title>
        <p>The BM25 indices were created using the Elasticsearch framework (version 8.6.0). The number of documents <italic>N<sub>D</sub></italic> retrieved per topic in the preprocessing step was set to 10,000 in our experiments. The pretrained language models were based on open-source checkpoints from the HuggingFace platform [<xref ref-type="bibr" rid="ref65">65</xref>] and were implemented using the open-source PyTorch framework. The language models used for the usefulness dimension and their respective HuggingFace implemations were BERT base (Capreolus/bert-base-msmarco), BERT large (castorini/monobert-large-msmarco-finetune-only), and ELECTRA (Capreolus/electra-base-msmarco). The language models used for the supportiveness dimension were RoBERTa base (allenai/biomed_roberta_base), RoBERTa large (roberta-large), and SciBERT (allenai/scibert_scivocab_uncased). For the credibility dimension, we used the random forest algorithm of the scikit-learn library. In the domain adaptation setup, we partitioned the 2019 labeled data set into training and validation sets using an 80%:20% split ratio; the latter was used to select the best models. We then fine-tuned BioBERT (dmis-lab/biobert-base-cased-v1.1) with a batch size of 16, learning rate of 1<sup>-5</sup>, and 20 epochs with early stopping set at 5 and utilizing the binary cross-entropy loss, which was optimized using the Adam optimizer. The BigBird model (google/bigbird-roberta-base) was fine-tuned with a batch size of 2, keeping all the other settings the same as the BioBERT model. All language models were fine-tuned using a single NVIDIA Tesla V100 graphics card with 32 GB of memory (see <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> for more details). Results are reported using the compatibility and normalized discounted cumulative gain (nDCG) metrics. For reference, they were compared with the results of other participants of the official TREC Health Misinformation 2021 track, which have submitted runs for the automatic evaluation (ie, without using information about the topic stance). The code repository is available at [<xref ref-type="bibr" rid="ref66">66</xref>].</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>No human participants were involved in this research. All data used to build and evaluate the deep language models were publicly available and open aceess.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Performance Results</title>
        <p>In <xref ref-type="table" rid="table3">Table 3</xref>, we present the performance results of our quality-based retrieval models using the TREC Health Misinformation 2021 benchmark. Helpful compatibility (help) considers only helpful documents of the relevant judgment, while harmful compatibility (harm) considers only harmful documents and help-harm considers their compatibility difference (see Table S1 in Multmedia Appendix 1 for further detail). Additionally, we show the nDCG scores calculated using helpful (help) documents or harmful (harm) documents of the relevant judgment. The helpful<italic><sub>T</sub></italic>, unhelpful<italic><sub>T</sub></italic>, and all<italic><sub>T</sub></italic> terms denote helpful topics, unhelpful topics, and all topics, respectively. <italic>H<sub>U</sub></italic>, <italic>H<sub>S</sub></italic>, and <italic>H<sub>C</sub></italic> rankings represent the combination of the preprocessing (<italic>H<sub>P</sub></italic>) results with the rerankings results for usefulness (<italic>H<sub>U</sub>’</italic>), supportiveness (<italic>H<sub>S</sub>’</italic>), and credibility (<italic>H<sub>C</sub>’</italic>), respectively. For reference, we show our results compared with the models participating in the TREC Health Misinformation Track 2021: Pradeep et al [<xref ref-type="bibr" rid="ref31">31</xref>] used the default BM25 ranker from Pyserini. Their reranking process incorporated a mix of mono and duo T5 models as well as Vera [<xref ref-type="bibr" rid="ref67">67</xref>] on different topic fields. Abualsaud et al [<xref ref-type="bibr" rid="ref68">68</xref>] created filtered collections that focus on filtering out nonmedical and unreliable documents, which were then used for retrieval with Anserini’s BM25. Schlicht et al [<xref ref-type="bibr" rid="ref69">69</xref>] also used Pyserini’s BM25 ranker and Bio Sentence BERT to estimate usefulness and RoBERTa for credibility. The final score was a fusion of these individual rankings. Fernández-Pichel et al [<xref ref-type="bibr" rid="ref70">70</xref>] used BM25 and RoBERTa for reranking and similarity assessment of the top 100 documents, trained an additional reliability classifier, and merged scores using CombSUM [<xref ref-type="bibr" rid="ref71">71</xref>] or Borda Count. Bondarenko et al [<xref ref-type="bibr" rid="ref72">72</xref>] used Anserini’s BM25 and PyGaggle’s MonoT5 for 2 baseline rankings, then reranked the top 20 from each using 3 argumentative axioms on seemingly argumentative queries.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Performance results for the quality-based retrieval models.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="180"/>
            <col width="0"/>
            <col width="90"/>
            <col width="0"/>
            <col width="130"/>
            <col width="0"/>
            <col width="0"/>
            <col width="100"/>
            <col width="0"/>
            <col width="110"/>
            <col width="0"/>
            <col width="120"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="90"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Model</td>
                <td colspan="5">nDCG<sup>a</sup></td>
                <td colspan="9">Compatibility</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="2">Help<sup>b</sup> ↑</td>
                <td colspan="2">Harm<sup>c</sup> ↓</td>
                <td colspan="3">Help ↑</td>
                <td colspan="2">Harm ↓</td>
                <td colspan="5">Help-harm ↑</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="2">all<sub><italic>T</italic></sub><sup>d</sup></td>
                <td colspan="2">all<sub><italic>T</italic></sub></td>
                <td colspan="3">all<sub><italic>T</italic></sub></td>
                <td colspan="2">all<sub><italic>T</italic></sub></td>
                <td colspan="2">helpful<sub><italic>T</italic></sub><sup>e</sup></td>
                <td colspan="2">unhelpful<sub><italic>T</italic></sub><sup>f</sup></td>
                <td>all<sub>𝑇</sub></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">BM25<sup>g</sup> [<xref ref-type="bibr" rid="ref39">39</xref>]</td>
                <td colspan="2">0.516</td>
                <td colspan="2">0.360</td>
                <td colspan="3">0.122</td>
                <td colspan="2">0.144</td>
                <td colspan="2">0.158</td>
                <td colspan="2">–0.162</td>
                <td>–0.022</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Pradeep et al [<xref ref-type="bibr" rid="ref31">31</xref>]</td>
                <td colspan="2">0.602</td>
                <td colspan="2">0.378</td>
                <td colspan="3">0.195<sup>h</sup></td>
                <td colspan="2">0.153</td>
                <td colspan="2">0.234<sup>h</sup></td>
                <td colspan="2">–0.106</td>
                <td>0.043</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Abualsaud et al [<xref ref-type="bibr" rid="ref68">68</xref>]</td>
                <td colspan="2">0.302</td>
                <td colspan="2">0.185<sup>h</sup></td>
                <td colspan="3">0.164</td>
                <td colspan="2">0.123</td>
                <td colspan="2">0.179</td>
                <td colspan="2">–0.067</td>
                <td>0.040</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Schlicht et al [<xref ref-type="bibr" rid="ref69">69</xref>]</td>
                <td colspan="2">0.438</td>
                <td colspan="2">0.309</td>
                <td colspan="3">0.121</td>
                <td colspan="2">0.103</td>
                <td colspan="2">0.157</td>
                <td colspan="2">–0.089</td>
                <td>0.018</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Fernández-Pichel et al [<xref ref-type="bibr" rid="ref70">70</xref>]</td>
                <td colspan="2">0.603<sup>h</sup></td>
                <td colspan="2">0.363</td>
                <td colspan="3">0.163</td>
                <td colspan="2">0.155</td>
                <td colspan="2">0.163</td>
                <td colspan="2">–0.113</td>
                <td>0.008</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Bondarenko et al [<xref ref-type="bibr" rid="ref72">72</xref>]</td>
                <td colspan="2">0.266</td>
                <td colspan="2">0.226</td>
                <td colspan="3">0.129</td>
                <td colspan="2">0.144</td>
                <td colspan="2">0.150</td>
                <td colspan="2">–0.144</td>
                <td>–0.015</td>
              </tr>
              <tr valign="top">
                <td colspan="17">
                  <bold>Transfer learning</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>H<sub>U</sub><sup>i</sup></italic>
                </td>
                <td colspan="2">0.538<sup>j</sup></td>
                <td colspan="2">0.324</td>
                <td colspan="3">0.142<sup>j</sup></td>
                <td colspan="2">0.087<sup>h</sup></td>
                <td colspan="2">0.156</td>
                <td colspan="2">–0.022<sup>h</sup></td>
                <td colspan="2">0.056<sup>h</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td><italic>H</italic><sub><italic>U</italic></sub> + <italic>H</italic><sub><italic>S</italic></sub><sup>k</sup></td>
                <td colspan="2">0.477</td>
                <td colspan="2">0.315<sup>j</sup></td>
                <td colspan="3">0.130</td>
                <td colspan="2">0.092</td>
                <td colspan="2">0.151</td>
                <td colspan="2">–0.049</td>
                <td colspan="2">0.038</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td><italic>H</italic><sub><italic>U</italic></sub> + <italic>H</italic><sub><italic>S</italic></sub> + <italic>H</italic><sub><italic>C</italic></sub><sup>l</sup></td>
                <td colspan="2">0.484</td>
                <td colspan="2">0.320</td>
                <td colspan="3">0.137</td>
                <td colspan="2">0.095</td>
                <td colspan="2">0.169<sup>j</sup></td>
                <td colspan="2">–0.057</td>
                <td colspan="2">0.042</td>
              </tr>
              <tr valign="top">
                <td colspan="17">
                  <bold>Domain adaptation</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>H</italic>
                  <sub>
                    <italic>U</italic>
                  </sub>
                </td>
                <td colspan="2">0.510</td>
                <td colspan="2">0.327</td>
                <td colspan="3">0.128</td>
                <td colspan="2">0.100</td>
                <td colspan="2">0.146</td>
                <td colspan="2">–0.063</td>
                <td colspan="2">0.029</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td><italic>H</italic><sub><italic>U</italic></sub> + <italic>H</italic><sub><italic>S</italic></sub></td>
                <td colspan="2">0.482</td>
                <td colspan="2">0.319</td>
                <td colspan="3">0.108</td>
                <td colspan="2">0.089</td>
                <td colspan="2">0.108</td>
                <td colspan="2">–0.050</td>
                <td colspan="2">0.019</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td><italic>H</italic><sub><italic>U</italic></sub> + <italic>H</italic><sub><italic>S</italic></sub> + <italic>H</italic><sub><italic>C</italic></sub><sup>l</sup></td>
                <td colspan="2">0.502</td>
                <td colspan="2">0.325</td>
                <td colspan="3">0.131</td>
                <td colspan="2">0.094</td>
                <td colspan="2">0.147</td>
                <td colspan="2">–0.048</td>
                <td colspan="2">0.037</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>nDCG: normalized discounted cumulative gain.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>Help: results considering only helpful documents in the relevance judgment.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>Harm: results considering only harmful documents in the relevance judgment.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>all<italic><sub>T</sub></italic>: all topics.</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup>helpful<italic><sub>T</sub></italic>: helpful topics.</p>
            </fn>
            <fn id="table3fn6">
              <p><sup>f</sup>unhelpful<italic><sub>T</sub></italic>: unhelpful topics.</p>
            </fn>
            <fn id="table3fn7">
              <p><sup>g</sup>BM25: Best Match 25.</p>
            </fn>
            <fn id="table3fn8">
              <p><sup>h</sup>Best performance.</p>
            </fn>
            <fn id="table3fn9">
              <p><sup>i</sup><italic>H<sub>U</sub></italic>: usefulness model.</p>
            </fn>
            <fn id="table3fn10">
              <p><sup>j</sup>Best performance among our models.</p>
            </fn>
            <fn id="table3fn11">
              <p><sup>k</sup><italic>H<sub>S</sub></italic>: supportiveness model.</p>
            </fn>
            <fn id="table3fn12">
              <p><sup>l</sup><italic>H<sub>C</sub></italic>: credibility model.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>Our approach provides state-of-the-art results for automatic ranking systems in the transfer learning setting, with help-harm compatibility of +5.6%. This result was obtained with the usefulness model (<italic>H<sub>U</sub></italic>), which is the combination of preprocessing and usefulness reranking. It outperformed the default BM25 model [<xref ref-type="bibr" rid="ref39">39</xref>] by 7% (<italic>P</italic>=.04) and the best automatic model from the TREC 2021 benchmark (Pradeep et al [<xref ref-type="bibr" rid="ref31">31</xref>]) by 1%. In this case, although the help and harm compatibility metrics individually exhibited statistical significance (<italic>P</italic>=.02 and <italic>P</italic>=.01, respectively), the improvement in help-harm compatibility compared with the best automatic model was not statistically significant (<italic>P</italic>=.70). The usefulness model also stood out by achieving the best help and harm compatibility metrics among our models (14.2% and 8.7%, respectively; <italic>P</italic>=.50). Notice that, for the latter metric, the closest to 0, the better the performance. Interestingly, the usefulness model attained the highest nDCG score on help for all topics as well (<italic>P</italic>=.03). The combination of usefulness, supportiveness, and credibility models (<italic>H<sub>U</sub></italic> + <italic>H<sub>S</sub></italic> + <italic>H<sub>C</sub></italic>) provided the best help-harm (+16.9%) for helpful topics among our models (<italic>H<sub>U</sub></italic>: <italic>P</italic>=.40; <italic>H<sub>U</sub></italic> + <italic>H<sub>S</sub></italic>: <italic>P</italic>=.04).</p>
        <p>Meanwhile, when calculating nDCG scores on harm, the combination of usefulness and supportiveness model (<italic>H<sub>U</sub></italic> + <italic>H<sub>S</sub></italic>) in the transfer learning and domain adaption settings outperformed the other model combinations (<italic>P</italic>=.50), indicating a different perspective of the best-performing model. Last, differently from what would be expected, in the domain adaption setting, the performance was poorer than the simpler transfer learning approach (2% decrease on average for the compatibility metric; <italic>P</italic>=.02). See Table S4 in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> for more information about using nDCG as a metric in a multidimensional evaluation.</p>
      </sec>
      <sec>
        <title>Performance Stratification by Quality Dimension</title>
        <p>In <xref ref-type="table" rid="table4">Table 4</xref>, we show the help, harm, and help-harm compatibility scores for the individual quality-based reranking models, which disregarded the preprocessing step (prime index). Additionally, we provide the nDCG scores for a more comprehensive view of the models’ performance. <italic>H<sub>P</sub></italic> represents the preprocessing, and <italic>H<sub>U</sub>’</italic>, <italic>H<sub>S</sub>’</italic>, and <italic>H<sub>C</sub>’</italic> stand for rerankings for usefulness, supportiveness, and credibility, respectively.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Performance results for the individual ranking models.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="170"/>
            <col width="0"/>
            <col width="140"/>
            <col width="0"/>
            <col width="100"/>
            <col width="0"/>
            <col width="0"/>
            <col width="100"/>
            <col width="0"/>
            <col width="110"/>
            <col width="0"/>
            <col width="120"/>
            <col width="0"/>
            <col width="140"/>
            <col width="0"/>
            <col width="90"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Setting and model</td>
                <td colspan="5">nDCG<sup>a</sup></td>
                <td colspan="9">Compatibility</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="2">Help<sup>b</sup> ↑</td>
                <td colspan="2">Harm<sup>c</sup> ↓</td>
                <td colspan="3">Help ↑</td>
                <td colspan="2">Harm ↓</td>
                <td colspan="5">Help-harm ↑</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="2">all<sub><italic>T</italic></sub><sup>d</sup></td>
                <td colspan="2">all<sub><italic>T</italic></sub></td>
                <td colspan="3">all<sub><italic>T</italic></sub></td>
                <td colspan="2">all<sub><italic>T</italic></sub></td>
                <td colspan="2">helpful<sub><italic>T</italic></sub><sup>e</sup></td>
                <td colspan="2">unhelpful<sub><italic>T</italic></sub><sup>f</sup></td>
                <td>all<sub><italic>T</italic></sub></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">
                  <italic>H<sub>P</sub><sup>g</sup></italic>
                </td>
                <td colspan="2">0.538<sup>h</sup></td>
                <td colspan="2">0.341</td>
                <td colspan="3">0.126<sup>h</sup></td>
                <td colspan="2">0.111</td>
                <td colspan="2">0.127<sup>h</sup></td>
                <td colspan="2">–0.072</td>
                <td>0.015</td>
              </tr>
              <tr valign="top">
                <td colspan="17">
                  <bold>Transfer learning</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>H<sub>U</sub>’<sup>i,j</sup></italic>
                </td>
                <td colspan="2">0.438</td>
                <td colspan="2">0.264</td>
                <td colspan="3">0.115</td>
                <td colspan="2">0.080</td>
                <td colspan="2">0.106</td>
                <td colspan="2">–0.020</td>
                <td colspan="2">0.036</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>H<sub>S</sub>’<sup>j,k</sup></italic>
                </td>
                <td colspan="2">0.140</td>
                <td colspan="2">0.102<sup>h</sup></td>
                <td colspan="3">0.026</td>
                <td colspan="2">0.024</td>
                <td colspan="2">0.021</td>
                <td colspan="2">–0.013</td>
                <td colspan="2">0.002</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>H<sub>C</sub>’<sup>j,l</sup></italic>
                </td>
                <td colspan="2">0.131</td>
                <td colspan="2">0.113</td>
                <td colspan="3">0.031</td>
                <td colspan="2">0.035</td>
                <td colspan="2">0.033</td>
                <td colspan="2">–0.032</td>
                <td colspan="2">–0.003</td>
              </tr>
              <tr valign="top">
                <td colspan="17">
                  <bold>Domain adaptation</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>H<sub>U</sub>’</italic>
                </td>
                <td colspan="2">0.436</td>
                <td colspan="2">0.277</td>
                <td colspan="3">0.077</td>
                <td colspan="2">0.038</td>
                <td colspan="2">0.099</td>
                <td colspan="2">–0.008</td>
                <td colspan="2">0.039<sup>h</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>H<sub>S</sub>’</italic>
                </td>
                <td colspan="2">0.368</td>
                <td colspan="2">0.251</td>
                <td colspan="3">0.030</td>
                <td colspan="2">0.015<sup>h</sup></td>
                <td colspan="2">0.030</td>
                <td colspan="2">0.003<sup>h</sup></td>
                <td colspan="2">0.014</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>H<sub>C</sub>’</italic>
                </td>
                <td colspan="2">0.443</td>
                <td colspan="2">0.296</td>
                <td colspan="3">0.079</td>
                <td colspan="2">0.064</td>
                <td colspan="2">0.104</td>
                <td colspan="2">–0.055</td>
                <td colspan="2">0.014</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>nDCG: normalized discounted cumulative gain.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>Help: results considering only helpful documents in the relevance judgment.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>Harm: results considering only harmful documents in the relevance judgment.</p>
            </fn>
            <fn id="table4fn4">
              <p><sup>d</sup>all<italic><sub>T</sub></italic>: all topics.</p>
            </fn>
            <fn id="table4fn5">
              <p><sup>e</sup>helpful<italic><sub>T</sub></italic>: helpful topics.</p>
            </fn>
            <fn id="table4fn6">
              <p><sup>f</sup>unhelpful<italic><sub>T</sub></italic>: unhelpful topics.</p>
            </fn>
            <fn id="table4fn7">
              <p><sup>g</sup><italic>H<sub>p</sub></italic>: preprocess.</p>
            </fn>
            <fn id="table4fn8">
              <p><sup>h</sup>Best performance.</p>
            </fn>
            <fn id="table4fn9">
              <p><sup>i</sup><italic>H<sub>U</sub>’</italic>: usefulness model.</p>
            </fn>
            <fn id="table4fn10">
              <p><sup>j</sup>Unlike <italic>H<sub>U</sub></italic>, <italic>H<sub>S</sub></italic>, and <italic>H<sub>C</sub></italic><sub>,</sub><italic>H<sub>U</sub></italic>’, <italic>H<sub>S</sub>’</italic>, and <italic>H<sub>C</sub>’</italic> rankings are not combined with <italic>H<sub>p</sub>.</italic></p>
            </fn>
            <fn id="table4fn11">
              <p><sup>k</sup><italic>H<sub>S</sub>’</italic>: supportiveness model.</p>
            </fn>
            <fn id="table4fn12">
              <p><sup>l</sup><italic>H<sub>C</sub>’</italic>: credibility model.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>In the transfer learning setting, the usefulness model (<italic>H<sub>U</sub>’</italic>) achieved the highest help-harm compatibility (+3.6%; <italic>P</italic>=.20). The preprocessing model gave the best help compatibility (+12.7%; <italic>H<sub>U</sub>’</italic>: <italic>P</italic>=.70; <italic>H<sub>S</sub>’</italic> and <italic>H<sub>C</sub>’</italic>: <italic>P</italic>&#60;.001). Additionally, the preprocessing model yielded the highest nDCG score for help (<italic>H<sub>U</sub>’</italic>: <italic>P</italic>=.10; <italic>H<sub>S</sub>’</italic> and <italic>H<sub>C</sub>’</italic>: <italic>P</italic>&#60;.001). On the other hand, the preprocessing model showed the highest harm compatibility (+11.1%; <italic>H<sub>U</sub>’</italic>: <italic>P</italic>=.33; <italic>H<sub>S</sub>’</italic> and <italic>H<sub>C</sub>’</italic>: <italic>P</italic>&#60;.01). The combination of the preprocessing and usefulness models (ie, <italic>H<sub>U</sub></italic>=+5.6%) improved the preprocessing model by 4.1% (from +1.5% to +5.6% on the help-harm compatibility; <italic>P</italic>=.06). For harm compatibility, the supportiveness model (<italic>H<sub>S</sub>’</italic>) achieved the best performance among the individual models (+2.4%; <italic>Hp</italic>: <italic>P</italic>&#60;.001; <italic>Hu’</italic>: <italic>P</italic>=.03; <italic>H<sub>C</sub>’</italic>: <italic>P</italic>=.34)<italic>.</italic></p>
        <p>In the domain adaptation setting, the usefulness model (<italic>H<sub>U</sub>’</italic>) reached help-harm compatibility of +3.9%, similarly outperforming the other models (<italic>P</italic>=.32). The supportiveness model (<italic>H<sub>S</sub>’</italic>) achieved the best performance on harm compatibility (+1.5%; <italic>P</italic>=.07) and on help-harm compatibility for unhelpful topics (+0.3%; <italic>P</italic>=.50). Notice that +0.3% is the only positive help-harm compatibility for harmful topics throughout all the individual and combined models on both settings including the preprocessing step. Last, in the domain adaption setting, the performance of individual models was better than the simpler transfer learning approach (1% increase on average for the compatibility metric; <italic>P</italic>=.19).</p>
      </sec>
      <sec>
        <title>Reranking of the Top-N Documents</title>
        <p>To further illustrate the effectiveness of the supportiveness and credibility dimensions, in <xref rid="figure2" ref-type="fig">Figure 2</xref>, we reranked only the top-n documents using the results of the usefulness model (<italic>H<sub>U</sub></italic>) as the basis. As we can see in <xref ref-type="table" rid="table4">Table 4</xref>, the overall effectiveness of the supportiveness (<italic>H<sub>S</sub>’</italic>) and credibility (<italic>H<sub>C</sub>’</italic>) models were considerably lower than that of the usefulness (<italic>H<sub>U</sub>’</italic>) model. The reason is that the relevance judgments were created using a hierarchical approach: Only useful documents were further considered for supportiveness and credibility evaluations. As we reranked the documents in supportiveness and credibility dimensions without taking this hierarchy into account, their results might not be optimal. For example, low-ranking documents (ie, not useful) could have high credibility and, during the reranking process, could be boosted to the top ranks. Thus, we applied the supportiveness (<italic>H<sub>S</sub>’</italic>) and credibility (<italic>H<sub>C</sub>’</italic>) models to the usefulness model (<italic>H<sub>U</sub></italic>) results to rerank the top 10, 20, 50, 100, and 1000 documents, obtaining 2 new rankings, which were combined using RRF.</p>
        <p>As the reranking depth increased from 10 to 1000, we observed a decrease in both help and harm compatibility. This suggests that both helpful and harmful documents were downgraded due to the inclusion of less useful but potentially supportive or credible documents. In the transfer learning setting, as the reranking depth increased, the help-harm compatibility decreased until the depth reached 100. Beyond this point, we observed a slight increase at the depth of 1000. In the domain adaptation setting, the help-harm compatibility increased above +6% when the reranking depth was between 20 and 50. This implies that, following the procedure of human annotation, by considering only the more useful documents, the supportiveness and credibility dimensions can help retrieve more helpful than harmful documents.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Compatibility performance for the top 10, 20, 50, 100, and 1000 reranking depths taking the results of usefulness as the basis.</p>
          </caption>
          <graphic xlink:href="ai_v3i1e42630_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Quality Control</title>
        <p>One of the advantages of the proposed multidimensional model is that we can optimize the results according to different quality metrics. In <xref rid="figure3" ref-type="fig">Figure 3</xref>, we show how the compatibility performance varies by changing the weight of the specific models (<italic>H<sub>P</sub></italic>, <italic>H<sub>U</sub>’</italic>, <italic>H<sub>S</sub>’</italic>, and <italic>H<sub>C</sub>’</italic>). We normalized the score of the individual models to the unit and combined them linearly using a weight for 1 model between 0 and 2 while fixing the weight for the other 3 models at 0.33. For example, to see the influence of <italic>H<sub>P</sub></italic> in the final performance, we fixed the weights of <italic>H<sub>U</sub>’</italic>, <italic>H<sub>S</sub>’</italic>, and <italic>H<sub>C</sub>’</italic> at 0.33 and varied the weight of <italic>H<sub>P</sub></italic> between 0 and 2. With weight 0, the reference model did not account for the final rank, while with weight 2, its impact was twice the sum of the other 3 models.</p>
        <p>In the transfer learning setting, when we increased the weight of preprocessing and usefulness models, the help-harm compatibility increased to the best performance (+4.1% and +5.6%) then decreased slightly. For the supportiveness and credibility dimensions, the help-harm compatibility began to decrease once the weight was added. These results imply that the compatibility decreases with the weight addition regardless of whether it is helpful compatibility, harmful compatibility, or the difference between the 2.</p>
        <p>In the domain adaptation setting, when we increased the weight of preprocessing, supportiveness, and credibility models individually, the help-harm compatibility increased then converged to +6.6%, +5.9%, and +4.8%, respectively. For the usefulness model, the help-harm compatibility decreased once the weight was added until it converged to +4.4%. It is worth noticing that, by combining the rankings linearly, the help-harm compatibility obtained from the domain adaptation setting may exceed the results we obtained when performing ranking combination with RRF (+3.7%), as well as the state-of-the-art result (+5.6%) in the transfer learning setting. The highest help-harm compatibility scores for each weighting combination were +6.6%, +6.8%, +6.5%, and +5.9% when varying the weights of <italic>H<sub>P</sub></italic>, <italic>H<sub>U</sub>’</italic>, <italic>H<sub>S</sub>’</italic>, and <italic>H<sub>C</sub>’</italic>, respectively.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Compatibility in the transfer learning approach (A-C) and compatibility in the domain adaptation approach (D-F), all with weights added to specific models.</p>
          </caption>
          <graphic xlink:href="ai_v3i1e42630_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Model Interpretation</title>
        <p>To semantically explain the variation of help-harm compatibility, we set the search depth 𝐾 to 10. The help, harm, and help-harm compatibility of the 3 models are shown in <xref ref-type="table" rid="table5">Table 5</xref>. The help-harm compatibility was 1 when only helpful documents were retrieved in the top 10. Conversely, the help-harm compatibility was –1 when only harmful documents were retrieved in the top 10. A variation of 10% in the help or harm compatibility corresponded roughly to 1 helpful document exceeding the number of harmful documents retrieved in the top 10. Overall, the results show that retrieving relevant documents for health-related queries is hard, as, on average, only 1.5 of 10 documents were relevant (helpful or harmful) to the topic. In addition, we interpreted that the 3 models retrieved, on average, twice the number of helpful documents as harmful documents. Particularly, <italic>H<sub>U</sub></italic> had, on average, around 1 more helpful than harmful document in the top 10, of the 1.5 relevant documents retrieved. We also present the same analysis results for the domain adaptation setting, which also implies that, when the rankings were combined with RRF, the transfer learning approach outperformed the domain adaptation approach. See more details about the average compatibility for all the topics as the search depth K varied in Figure S1 in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Help, harm, and help-harm compatibility with search depth set to 10 for the transfer learning setting and domain adaptation setting.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="470"/>
            <col width="140"/>
            <col width="150"/>
            <col width="210"/>
            <thead>
              <tr valign="bottom">
                <td colspan="2">Setting and model</td>
                <td>Help<sup>a</sup> ↑</td>
                <td>Harm<sup>b</sup> ↓</td>
                <td>Help-harm ↑</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="5">
                  <bold>Transfer learning</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>H<sub>U</sub><sup>c</sup></italic>
                </td>
                <td>0.112<sup>d</sup></td>
                <td>0.047<sup>d</sup></td>
                <td>0.065<sup>d</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td><italic>H</italic><sub><italic>U</italic></sub> + <italic>H</italic><sub><italic>S</italic></sub><sup>e</sup></td>
                <td>0.088</td>
                <td>0.050</td>
                <td>0.038</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td><italic>H</italic><sub><italic>U</italic></sub> + <italic>H</italic><sub><italic>S</italic></sub> + <italic>H</italic><sub><italic>C</italic></sub><sup>f</sup></td>
                <td>0.099</td>
                <td>0.056</td>
                <td>0.044</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Domain adaptation</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>H</italic>
                  <sub>
                    <italic>U</italic>
                  </sub>
                </td>
                <td>0.094</td>
                <td>0.060</td>
                <td>0.034</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td><italic>H</italic><sub><italic>U</italic></sub> + <italic>H</italic><sub><italic>S</italic></sub></td>
                <td>0.074</td>
                <td>0.070</td>
                <td>0.003</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td><italic>H</italic><sub><italic>U</italic></sub> + <italic>H</italic><sub><italic>S</italic></sub> + <italic>H</italic><sub><italic>C</italic></sub></td>
                <td>0.087</td>
                <td>0.076</td>
                <td>0.011</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>Help: results considering only helpful documents in the relevance judgment.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>Harm: results considering only harmful documents in the relevance judgment.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>c</sup><italic>H<sub>U</sub></italic>: usefulness model.</p>
            </fn>
            <fn id="table5fn4">
              <p><sup>d</sup>Best performance.</p>
            </fn>
            <fn id="table5fn5">
              <p><sup>e</sup><italic>H<sub>S</sub></italic>: supportiveness model.</p>
            </fn>
            <fn id="table5fn6">
              <p><sup>f</sup><italic>H<sub>C</sub></italic>: credibility model.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>We propose a quality-based multidimensional ranking model to enhance the usefulness, supportiveness, and credibility of retrieved web resources for health-related queries. By adapting our approach in a transfer learning setting, we showed state-of-the-art results in the automatic quality ranking evaluation benchmark. We further explored the pipeline in a domain adaptation setting and showed that, in both settings, the proposed method can identify more helpful than harmful documents, as measured by +5% and +7% help-harm compatibility scores, respectively. By combining different reranking strategies, we showed that multidimensional aspects have a significant impact on retrieving high-quality information, particularly for unhelpful topics.</p>
      <p>The quality of web documents is biased in terms of topic stance. For all models, helpful topics achieve higher help compatibility, while unhelpful topics achieve higher harm compatibility. The implication is that web documents centered around helpful topics are more likely to support the intervention and are helpful. On the other hand, web documents focusing on unhelpful topics present an equal chance of being supportive or dissuasive on the intervention and are helpful or harmful. Among other consequences, if web data are used to train large language models without meticulously crafted training examples using effective data set search methods [<xref ref-type="bibr" rid="ref73">73</xref>], as the one proposed here, they are likely to further propagate health misinformation.</p>
      <p>Automatic retrieval systems tend to find more helpful information on helpful topics with the information biased toward helpfulness and find more harmful information on unhelpful topics with the information slightly biased toward harmfulness. The help-harm compatibility ranged from +2.3% to +15.3% for helpful topics and from –5.7% to +0.2% for unhelpful topics. The difference shows that, for the improvement of quality-centered retrieval models, it is especially important to focus on unhelpful topics. Moreover, although specialized models might provide enhanced effectiveness, their combination is not straightforward. In our experiments, we showed that supportiveness and credibility models should be applied only in the top 20 to 50 retrieved documents to achieve optimal performance.</p>
      <p>Finding the correct stance automatically is another key component of the automatic model. Automatic models show the ability to prioritize helpful documents, resulting in positive help-harm compatibility. However, they are still far from state-of-the-art manual models, with help-harm compatibility scores ranging from +20.8% [<xref ref-type="bibr" rid="ref68">68</xref>] to +25.9% [<xref ref-type="bibr" rid="ref31">31</xref>]. We acknowledge that the help-harm compatibility can improve significantly with the correct stance given. This information is nevertheless unavailable in standard search environments; thus, the scenario analyzed in this work is more adapted to real-world applications.</p>
      <p>This work has certain limitations. In the domain adaptation setting, we simplified the task to consider 2 classes within each dimension for the classification due to the limited variety available in the labeled data set. Alternatively, we could add other classes from documents that have been retrieved. Moreover, the number of topics used to evaluate our models was limited (n=32), despite including 6030 human-annotated, query-document pairs, and thus reflects only a small portion of misinformation use cases.</p>
      <p>To conclude, the proliferation of health misinformation in web resources has led to mistrust and confusion among online health advice seekers. Automatic maintenance of factual discretion in web search results is the need of the hour. We propose a multidimensional information quality ranking model that utilizes usefulness, supportiveness, and credibility to strengthen the factual reliability of health advice search results. Experiments conducted on publicly available data sets show that the proposed model is promising, achieving state-of-the-art performance for automatic ranking in comparison with various baselines implemented on the TREC Health Misinformation 2021 benchmark. Thus, the proposed approach could be used to improve online health searches and provide quality-enhanced information for health information seekers. Future research could explore more granular classification models for each dimension, and a model simplification could provide an advantage for real-world implementations.</p>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Additional Information on Benchmark Datasets.</p>
        <media xlink:href="ai_v3i1e42630_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 22 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Fine-Tuning in the Domain Adaptation Setting.</p>
        <media xlink:href="ai_v3i1e42630_app2.pdf" xlink:title="PDF File  (Adobe PDF File), 69 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Supporting Experiment Results.</p>
        <media xlink:href="ai_v3i1e42630_app3.pdf" xlink:title="PDF File  (Adobe PDF File), 195 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BM25</term>
          <def>
            <p>Best Match 25</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">C4</term>
          <def>
            <p>Colossal Clean Crawled Corpus</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CLEF</term>
          <def>
            <p>Conference and Labs of the Evaluation Forum</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">CSS</term>
          <def>
            <p>cascading style sheets</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">nDCG</term>
          <def>
            <p>normalized discounted cumulative gain</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">NIST</term>
          <def>
            <p>National Institute of Standards and Technology</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">RBO</term>
          <def>
            <p>rank-biased overlap</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">RoBERTa</term>
          <def>
            <p>robustly optimized BERT approach</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">RRF</term>
          <def>
            <p>reciprocal rank fusion</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">TREC</term>
          <def>
            <p>Text Retrieval Conference</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The study was funding by Innosuisse projects (funding numbers 55441.1 IP-ICT and 101.466 IP-ICT).</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The data sets generated during and/or analyzed during this study are available in the Text Retrieval Conference (TREC) Health Misinformation Track repository [<xref ref-type="bibr" rid="ref74">74</xref>] and GitLab repository [<xref ref-type="bibr" rid="ref66">66</xref>].</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>BZ, NN, and DT prepared the data, conceived and conducted the experiments, and analyzed the results. BZ, NN, and DT drafted the manuscript. All authors reviewed the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goeuriot</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>GJF</given-names>
            </name>
            <name name-style="western">
              <surname>Kelly</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Müller</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zobel</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Medical information retrieval: introduction to the special issue</article-title>
          <source>Inf Retrieval J</source>
          <year>2016</year>
          <month>1</month>
          <day>11</day>
          <volume>19</volume>
          <issue>1-2</issue>
          <fpage>1</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1007/s10791-015-9277-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chu</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Viswanath</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lam</surname>
              <given-names>TH</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>SSC</given-names>
            </name>
          </person-group>
          <article-title>How, when and why people seek health information online: qualitative study in Hong Kong</article-title>
          <source>Interact J Med Res</source>
          <year>2017</year>
          <month>12</month>
          <day>12</day>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>e24</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.i-jmr.org/2017/2/e24/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/ijmr.7000</pub-id>
          <pub-id pub-id-type="medline">29233802</pub-id>
          <pub-id pub-id-type="pii">v6i2e24</pub-id>
          <pub-id pub-id-type="pmcid">PMC5743920</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>SZ</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>JYH</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Associations between COVID-19 misinformation exposure and belief with COVID-19 knowledge and preventive behaviors: cross-sectional online study</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>11</month>
          <day>13</day>
          <volume>22</volume>
          <issue>11</issue>
          <fpage>e22205</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/11/e22205/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/22205</pub-id>
          <pub-id pub-id-type="medline">33048825</pub-id>
          <pub-id pub-id-type="pii">v22i11e22205</pub-id>
          <pub-id pub-id-type="pmcid">PMC7669362</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ecker</surname>
              <given-names>UKH</given-names>
            </name>
            <name name-style="western">
              <surname>Lewandowsky</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cook</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Schmid</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Fazio</surname>
              <given-names>LK</given-names>
            </name>
            <name name-style="western">
              <surname>Brashier</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kendeou</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Vraga</surname>
              <given-names>EK</given-names>
            </name>
            <name name-style="western">
              <surname>Amazeen</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>The psychological drivers of misinformation belief and its resistance to correction</article-title>
          <source>Nat Rev Psychol</source>
          <year>2022</year>
          <month>01</month>
          <day>12</day>
          <volume>1</volume>
          <issue>1</issue>
          <fpage>13</fpage>
          <lpage>29</lpage>
          <pub-id pub-id-type="doi">10.1038/s44159-021-00006-y</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Krist</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Tong</surname>
              <given-names>ST</given-names>
            </name>
            <name name-style="western">
              <surname>Aycock</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Longo</surname>
              <given-names>DR</given-names>
            </name>
          </person-group>
          <article-title>Engaging patients in decision-making and behavior change to promote prevention</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2017</year>
          <volume>240</volume>
          <fpage>284</fpage>
          <lpage>302</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28972524"/>
          </comment>
          <pub-id pub-id-type="medline">28972524</pub-id>
          <pub-id pub-id-type="pmcid">PMC6996004</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Swire-Thompson</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Lazer</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Public health and online misinformation: challenges and recommendations</article-title>
          <source>Annu Rev Public Health</source>
          <year>2020</year>
          <month>04</month>
          <day>02</day>
          <volume>41</volume>
          <issue>1</issue>
          <fpage>433</fpage>
          <lpage>451</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.annualreviews.org/doi/abs/10.1146/annurev-publhealth-040119-094127?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1146/annurev-publhealth-040119-094127</pub-id>
          <pub-id pub-id-type="medline">31874069</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sundin</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Lewandowski</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Haider</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Whose relevance? Web search engines as multisided relevance machines</article-title>
          <source>Asso for Info Science &#38; Tech</source>
          <year>2021</year>
          <month>08</month>
          <day>21</day>
          <volume>73</volume>
          <issue>5</issue>
          <fpage>637</fpage>
          <lpage>642</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1002/asi.24570"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/asi.24570</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sullivan</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>How Google delivers reliable information in Search</article-title>
          <source>Google</source>
          <year>2020</year>
          <month>09</month>
          <day>10</day>
          <access-date>2024-04-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://blog.google/products/search/how-google-delivers-reliable-information-search/">https://blog.google/products/search/how-google-delivers-reliable-information-search/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Di Sotto</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Viviani</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Health misinformation detection in the social web: an overview and a data science approach</article-title>
          <source>Int J Environ Res Public Health</source>
          <year>2022</year>
          <month>02</month>
          <day>15</day>
          <volume>19</volume>
          <issue>4</issue>
          <fpage>A</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=ijerph19042173"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/ijerph19042173</pub-id>
          <pub-id pub-id-type="medline">35206359</pub-id>
          <pub-id pub-id-type="pii">ijerph19042173</pub-id>
          <pub-id pub-id-type="pmcid">PMC8872515</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sylvia Chou</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Gaysynsky</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cappella</surname>
              <given-names>JN</given-names>
            </name>
          </person-group>
          <article-title>Where we go from here: health misinformation on social media</article-title>
          <source>Am J Public Health</source>
          <year>2020</year>
          <month>10</month>
          <volume>110</volume>
          <issue>S3</issue>
          <fpage>S273</fpage>
          <lpage>S275</lpage>
          <pub-id pub-id-type="doi">10.2105/ajph.2020.305905</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kickbusch</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Health literacy: addressing the health and education divide</article-title>
          <source>Health Promot Int</source>
          <year>2001</year>
          <month>09</month>
          <volume>16</volume>
          <issue>3</issue>
          <fpage>289</fpage>
          <lpage>97</lpage>
          <pub-id pub-id-type="doi">10.1093/heapro/16.3.289</pub-id>
          <pub-id pub-id-type="medline">11509466</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Suarez-Lledo</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Alvarez-Galvez</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Prevalence of health misinformation on social media: systematic review</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>01</month>
          <day>20</day>
          <volume>23</volume>
          <issue>1</issue>
          <fpage>e17187</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/1/e17187/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/17187</pub-id>
          <pub-id pub-id-type="medline">33470931</pub-id>
          <pub-id pub-id-type="pii">v23i1e17187</pub-id>
          <pub-id pub-id-type="pmcid">PMC7857950</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eysenbach</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>How to fight an infodemic: the four pillars of infodemic management</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>06</month>
          <day>29</day>
          <volume>22</volume>
          <issue>6</issue>
          <fpage>e21820</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/6/e21820/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/21820</pub-id>
          <pub-id pub-id-type="medline">32589589</pub-id>
          <pub-id pub-id-type="pii">v22i6e21820</pub-id>
          <pub-id pub-id-type="pmcid">PMC7332253</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Burki</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Vaccine misinformation and social media</article-title>
          <source>The Lancet Digital Health</source>
          <year>2019</year>
          <month>10</month>
          <volume>1</volume>
          <issue>6</issue>
          <fpage>e258</fpage>
          <lpage>e259</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/S2589-7500(19)30136-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/s2589-7500(19)30136-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lotto</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sá Menezes</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zakir Hussain</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Tsao</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmad Butt</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>P Morita</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Cruvinel</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Characterization of false or misleading fluoride content on Instagram: infodemiology study</article-title>
          <source>J Med Internet Res</source>
          <year>2022</year>
          <month>05</month>
          <day>19</day>
          <volume>24</volume>
          <issue>5</issue>
          <fpage>e37519</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2022/5/e37519/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/37519</pub-id>
          <pub-id pub-id-type="medline">35588055</pub-id>
          <pub-id pub-id-type="pii">v24i5e37519</pub-id>
          <pub-id pub-id-type="pmcid">PMC9164089</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mackey</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Purushothaman</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Haupt</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Nali</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Application of unsupervised machine learning to identify and characterise hydroxychloroquine misinformation on Twitter</article-title>
          <source>The Lancet Digital Health</source>
          <year>2021</year>
          <month>02</month>
          <volume>3</volume>
          <issue>2</issue>
          <fpage>e72</fpage>
          <lpage>e75</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/S2589-7500(20)30318-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/s2589-7500(20)30318-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nsoesie</surname>
              <given-names>EO</given-names>
            </name>
            <name name-style="western">
              <surname>Cesare</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Müller</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ozonoff</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 misinformation spread in eight countries: exponential growth modeling study</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>12</month>
          <day>15</day>
          <volume>22</volume>
          <issue>12</issue>
          <fpage>e24425</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/12/e24425/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/24425</pub-id>
          <pub-id pub-id-type="medline">33264102</pub-id>
          <pub-id pub-id-type="pii">v22i12e24425</pub-id>
          <pub-id pub-id-type="pmcid">PMC7744144</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Upadhyay</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Pasi</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Viviani</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Health misinformation detection in web content: a structural-, content-based, and context-aware approach based on Web2Vec</article-title>
          <source>GoodIT '21: Proceedings of the Conference on Information Technology for Social Good</source>
          <year>2021</year>
          <month>09</month>
          <fpage>19</fpage>
          <lpage>24</lpage>
          <pub-id pub-id-type="doi">10.1145/3462203.3475898</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hesse</surname>
              <given-names>BW</given-names>
            </name>
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Kreps</surname>
              <given-names>GL</given-names>
            </name>
            <name name-style="western">
              <surname>Croyle</surname>
              <given-names>RT</given-names>
            </name>
            <name name-style="western">
              <surname>Arora</surname>
              <given-names>NK</given-names>
            </name>
            <name name-style="western">
              <surname>Rimer</surname>
              <given-names>BK</given-names>
            </name>
            <name name-style="western">
              <surname>Viswanath</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Trust and sources of health information: the impact of the Internet and its implications for health care providers: findings from the first Health Information National Trends Survey</article-title>
          <source>Arch Intern Med</source>
          <year>2005</year>
          <volume>165</volume>
          <issue>22</issue>
          <fpage>2618</fpage>
          <lpage>24</lpage>
          <pub-id pub-id-type="doi">10.1001/archinte.165.22.2618</pub-id>
          <pub-id pub-id-type="medline">16344419</pub-id>
          <pub-id pub-id-type="pii">165/22/2618</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van der Linden</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Misinformation: susceptibility, spread, and interventions to immunize the public</article-title>
          <source>Nat Med</source>
          <year>2022</year>
          <month>03</month>
          <day>10</day>
          <volume>28</volume>
          <issue>3</issue>
          <fpage>460</fpage>
          <lpage>467</lpage>
          <pub-id pub-id-type="doi">10.1038/s41591-022-01713-6</pub-id>
          <pub-id pub-id-type="medline">35273402</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-022-01713-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pogacar</surname>
              <given-names>FA</given-names>
            </name>
            <name name-style="western">
              <surname>Ghenai</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Smucker</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Clarke</surname>
              <given-names>CLA</given-names>
            </name>
          </person-group>
          <article-title>The positive and negative influence of search results on people's decisions about the efficacy of medical treatments</article-title>
          <source>ICTIR '17: Proceedings of the ACM SIGIR International Conference on Theory of Information Retrieval</source>
          <year>2017</year>
          <month>10</month>
          <fpage>209</fpage>
          <lpage>216</lpage>
          <pub-id pub-id-type="doi">10.1145/3121050.3121074</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Upadhyay</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Pasi</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Viviani</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>An overview on evaluation labs and open issues in health-related credible information retrieval</article-title>
          <source>Proceedings of the 11th Italian Information Retrieval Workshop 2021</source>
          <year>2021</year>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ceur-ws.org/Vol-2947/paper31.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Suominen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kelly</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Goeuriot</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Krallinger</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>CLEF eHealth Evaluation Lab 2020</article-title>
          <source>Advances in Information Retrieval</source>
          <year>2020</year>
          <volume>12036</volume>
          <fpage>587</fpage>
          <lpage>594</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-030-45442-5_76</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Clarke</surname>
              <given-names>CLA</given-names>
            </name>
            <name name-style="western">
              <surname>Maistro</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Smucker</surname>
              <given-names>MD</given-names>
            </name>
          </person-group>
          <article-title>Overview of the TREC 2021 Health Misinformation Track</article-title>
          <source>NIST Special Publication: NIST SP 500-335: The Thirtieth Text REtrieval Conference (TREC 2021) Proceedings</source>
          <year>2022</year>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://trec.nist.gov/pubs/trec30/papers/Overview-HM.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Solainayagi</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ponnusamy</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Trustworthy media news content retrieval from web using truth content discovery algorithm</article-title>
          <source>Cognitive Systems Research</source>
          <year>2019</year>
          <month>08</month>
          <volume>56</volume>
          <fpage>26</fpage>
          <lpage>35</lpage>
          <pub-id pub-id-type="doi">10.1016/j.cogsys.2019.01.002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Truth discovery with memory network</article-title>
          <source>Tsinghua Science and Technology</source>
          <year>2017</year>
          <month>12</month>
          <volume>22</volume>
          <issue>6</issue>
          <fpage>609</fpage>
          <lpage>618</lpage>
          <pub-id pub-id-type="doi">10.23919/tst.2017.8195344</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Pradeep</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Nogueira</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Fang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Covidex: neural ranking models and keyword search infrastructure for the COVID-19 open research dataset</article-title>
          <source>Proceedings of the First Workshop on Scholarly Document Processing</source>
          <year>2020</year>
          <fpage>31</fpage>
          <lpage>41</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/2020.sdp-1.5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Teodoro</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ferdowsi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Borissov</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kashani</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Vicente Alvarez</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Copara</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gouareb</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Naderi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Amini</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Information retrieval in an infodemic: the case of COVID-19 publications</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>09</month>
          <day>17</day>
          <volume>23</volume>
          <issue>9</issue>
          <fpage>e30161</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://boris.unibe.ch/id/eprint/158358"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/30161</pub-id>
          <pub-id pub-id-type="medline">34375298</pub-id>
          <pub-id pub-id-type="pii">v23i9e30161</pub-id>
          <pub-id pub-id-type="pmcid">PMC8451964</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fernández-Pichel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Losada</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Pichel</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Elsweiler</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Candan</surname>
              <given-names>KS</given-names>
            </name>
          </person-group>
          <article-title>Comparing TraditionalNeural Approaches for Detecting Health-Related Misinformation</article-title>
          <source>Experimental IR Meets Multilinguality, Multimodality, and Interaction. CLEF 2021. Lecture Notes in Computer Science(), vol 12880</source>
          <year>2021</year>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer International Publishing</publisher-name>
          <fpage>78</fpage>
          <lpage>90</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lima</surname>
              <given-names>LC</given-names>
            </name>
            <name name-style="western">
              <surname>Wright</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Augenstein</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Maistro</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>University of Copenhagen participation in TREC Health Misinformation Track 2020</article-title>
          <source>NIST Special Publication: NIST SP 1266: The Twenty-Ninth Text REtrieval Conference (TREC 2020) Proceedings</source>
          <year>2021</year>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://trec.nist.gov/pubs/trec29/papers/KU.HM.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pradeep</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Nogueira</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Vera: prediction techniques for reducing harmful misinformation in consumer health search</article-title>
          <source>SIGIR '21: Proceedings of the 44th International ACM SIGIR Conference on Research and Development in Information Retrieval</source>
          <year>2021</year>
          <month>07</month>
          <fpage>2066</fpage>
          <lpage>2070</lpage>
          <pub-id pub-id-type="doi">10.1145/3404835.3463120</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cormack</surname>
              <given-names>GV</given-names>
            </name>
            <name name-style="western">
              <surname>Clarke</surname>
              <given-names>CLA</given-names>
            </name>
            <name name-style="western">
              <surname>Buettcher</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Reciprocal rank fusion outperforms condorcet and individual rank learning methods</article-title>
          <source>SIGIR '09: Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval</source>
          <year>2009</year>
          <fpage>758</fpage>
          <lpage>759</lpage>
          <pub-id pub-id-type="doi">10.1145/1571941.1572114</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abualsaud</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lioma</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Maistro</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Smucker</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zuccon</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Overview of the TREC 2019 Decision Track</article-title>
          <source>NIST Special Publication: SP 500-331: The Twenty-Eighth Text REtrieval Conference (TREC 2019) Proceedings</source>
          <year>2020</year>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://trec.nist.gov/pubs/trec28/papers/OVERVIEW.D.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Naderi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Jaume-Santero</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Teodoro</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>DS4DH at TREC Health Misinformation 2021: multi-dimensional ranking models with transfer learning and rank fusion</article-title>
          <source>NIST Special Publication: NIST SP 500-335: The Thirtieth Text REtrieval Conference (TREC 2021) Proceedings</source>
          <year>2022</year>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://trec.nist.gov/pubs/trec30/papers/DigiLab-HM.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Clarke</surname>
              <given-names>CLA</given-names>
            </name>
            <name name-style="western">
              <surname>Rizvi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Smucker</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Maistro</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zuccon</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Overview of the TREC 2020 Health Misinformation Track</article-title>
          <source>NIST Special Publication: NIST SP 1266: The Twenty-Ninth Text REtrieval Conference (TREC 2020) Proceedings</source>
          <year>2021</year>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://trec.nist.gov/pubs/trec29/papers/OVERVIEW.HM.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
          <source>National Institute of Standards and Technology</source>
          <access-date>2024-04-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nist.gov/">https://www.nist.gov/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Raffel</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Narang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Matena</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>PJ</given-names>
            </name>
          </person-group>
          <article-title>Exploring the limits of transfer learning with a unified text-to-text transformer</article-title>
          <source>Journal of Machine Learning Research</source>
          <year>2020</year>
          <volume>21</volume>
          <issue>140</issue>
          <fpage>1</fpage>
          <lpage>67</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jmlr.org/papers/volume21/20-074/20-074.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="web">
          <source>Common Crawl</source>
          <access-date>2024-04-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://commoncrawl.org/">https://commoncrawl.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Robertson</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zaragoza</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>The probabilistic relevance framework: BM25 and beyond</article-title>
          <source>Foundations and Trends in Information Retrieval</source>
          <year>2009</year>
          <month>04</month>
          <volume>3</volume>
          <issue>4</issue>
          <fpage>333</fpage>
          <lpage>389</lpage>
          <pub-id pub-id-type="doi">10.1561/1500000019</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Yates</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>MacAvaney</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>PARADE: Passage Representation Aggregation for Document Reranking</article-title>
          <source>ACM Transactions on Information Systems</source>
          <year>2023</year>
          <month>09</month>
          <day>27</day>
          <volume>42</volume>
          <issue>2</issue>
          <fpage>1</fpage>
          <lpage>26</lpage>
          <pub-id pub-id-type="doi">10.1145/3600088</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nogueira</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Multi-stage document ranking with BERT</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on October 31, 2019</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1910.14424 Focus to learn more</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Luong</surname>
              <given-names>MH</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>QV</given-names>
            </name>
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>CD</given-names>
            </name>
          </person-group>
          <article-title>Electra: Pre-training text encoders as discriminators rather than generators</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on March 23, 2020</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2003.10555</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jeong</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Green</surname>
              <given-names>PA</given-names>
            </name>
          </person-group>
          <article-title>How consistent are the best-known readability equations in estimating the readability of design standards?</article-title>
          <source>IEEE Trans. Profess. Commun</source>
          <year>2017</year>
          <month>3</month>
          <volume>60</volume>
          <issue>1</issue>
          <fpage>97</fpage>
          <lpage>111</lpage>
          <pub-id pub-id-type="doi">10.1109/tpc.2016.2635720</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grabeel</surname>
              <given-names>KL</given-names>
            </name>
            <name name-style="western">
              <surname>Russomanno</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Oelschlegel</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tester</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Heidel</surname>
              <given-names>RE</given-names>
            </name>
          </person-group>
          <article-title>Computerized versus hand-scored health literacy tools: a comparison of Simple Measure of Gobbledygook (SMOG) and Flesch-Kincaid in printed patient education materials</article-title>
          <source>J Med Libr Assoc</source>
          <year>2018</year>
          <month>01</month>
          <day>12</day>
          <volume>106</volume>
          <issue>1</issue>
          <fpage>38</fpage>
          <lpage>45</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29339932"/>
          </comment>
          <pub-id pub-id-type="doi">10.5195/jmla.2018.262</pub-id>
          <pub-id pub-id-type="medline">29339932</pub-id>
          <pub-id pub-id-type="pii">jmla-106-38</pub-id>
          <pub-id pub-id-type="pmcid">PMC5764592</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="web">
          <article-title>getPageRank</article-title>
          <source>OpenPageRank</source>
          <access-date>2024-04-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.domcop.com/openpagerank/documentation">https://www.domcop.com/openpagerank/documentation</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Boyer</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Selby</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Scherrer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Appel</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>The Health On the Net Code of Conduct for medical and health websites</article-title>
          <source>Comput Biol Med</source>
          <year>1998</year>
          <month>09</month>
          <volume>28</volume>
          <issue>5</issue>
          <fpage>603</fpage>
          <lpage>10</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/S0010-4825(98)00037-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/s0010-4825(98)00037-7</pub-id>
          <pub-id pub-id-type="medline">9861515</pub-id>
          <pub-id pub-id-type="pii">S0010-4825(98)00037-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bajaj</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Campos</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Craswell</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Majumder</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>McNamara</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mitra</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenberg</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Stoica</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tiwary</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>MS MARCO: a human generated machine reading comprehension dataset</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on October 31, 2018</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1611.09268</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ott</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Joshi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zettlemoyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Stoyanov</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>RoBERTa: a robustly optimized BERT pretraining approach</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on July 26, 2019</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1907.11692</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gururangan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Marasović</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Swayamdipta</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lo</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Beltagy</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Downey</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Don’t stop pretraining: adapt language models to domains and tasks</article-title>
          <source>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</source>
          <year>2020</year>
          <fpage>8342</fpage>
          <lpage>8360</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2020.acl-main.740"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2020.acl-main.740</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Beltagy</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Lo</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Cohan</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>SciBERT: a pretrained language model for scientific text</article-title>
          <source>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</source>
          <year>2019</year>
          <fpage>3615</fpage>
          <lpage>3620</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/d19-1371</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aly</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Schlichtkrull</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Thorne</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Vlachos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Christodoulopoulos</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cocarascu</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Mittal</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The fact extraction and verification over unstructured and structured information (FEVEROUS) shared task</article-title>
          <source>Proceedings of the Fourth Workshop on Fact Extraction and VERification (FEVER)</source>
          <year>2021</year>
          <fpage>1</fpage>
          <lpage>13</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/2021.fever-1.1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wadden</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lo</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>van Zuylen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cohan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hajishirzi</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Fact or fiction: verifying scientific claims</article-title>
          <source>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)</source>
          <year>2020</year>
          <fpage>7534</fpage>
          <lpage>7550</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/2020.emnlp-main.609</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stammbach</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ash</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>The choice of textual knowledge base in automated claim checking</article-title>
          <source>Journal of Data and Information Quality</source>
          <year>2023</year>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>22</lpage>
          <pub-id pub-id-type="doi">10.1145/3561389</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schwarz</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Morris</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Augmenting web pages and search results to support credibility assessment</article-title>
          <source>CHI '11: Proceedings of the SIGCHI Conference on Human Factors in Computing Systems</source>
          <year>2011</year>
          <fpage>1245</fpage>
          <lpage>1254</lpage>
          <pub-id pub-id-type="doi">10.1145/1978942.1979127</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Olteanu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Peshterliev</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Aberer</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Serdyukov</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Braslavski</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kuznetsov</surname>
              <given-names>SO</given-names>
            </name>
            <name name-style="western">
              <surname>Kamps</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Rüger</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Agichtein</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Segalovich</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Yilmaz</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Web credibility: Features exploration and credibility prediction</article-title>
          <source>Advances in Information Retrieval. ECIR 2013. Lecture Notes in Computer Science, vol 7814</source>
          <year>2013</year>
          <publisher-loc>Berlin, Germany</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>557</fpage>
          <lpage>568</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>So</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>BioBERT: a pre-trained biomedical language representation model for biomedical text mining</article-title>
          <source>Bioinformatics</source>
          <year>2020</year>
          <month>02</month>
          <day>15</day>
          <volume>36</volume>
          <issue>4</issue>
          <fpage>1234</fpage>
          <lpage>1240</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31501885"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btz682</pub-id>
          <pub-id pub-id-type="medline">31501885</pub-id>
          <pub-id pub-id-type="pii">5566506</pub-id>
          <pub-id pub-id-type="pmcid">PMC7703786</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zaheer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Guruganesh</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dubey</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ainslie</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Alberti</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ontanon</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pham</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ravula</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmed</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Big Bird: transformers for longer sequences</article-title>
          <source>Proceedings of the 34th Conference on Neural Information Processing Systems (NeurIPS 2020)</source>
          <year>2020</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://proceedings.neurips.cc/paper_files/paper/2020/file/c8512d142a2d849725f31a9a7a361ab9-Paper.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Tahami</surname>
              <given-names>AV</given-names>
            </name>
            <name name-style="western">
              <surname>Abualsaud</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Smucker</surname>
              <given-names>MD</given-names>
            </name>
          </person-group>
          <article-title>Learning trustworthy web sources to derive correct answers and reduce health misinformation in search</article-title>
          <source>SIGIR '22: Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval</source>
          <year>2022</year>
          <fpage>2099</fpage>
          <lpage>2104</lpage>
          <pub-id pub-id-type="doi">10.1145/3477495.3531812</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="web">
          <article-title>The ClueWeb12 Dataset</article-title>
          <source>The Lemur Project</source>
          <access-date>2024-04-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://lemurproject.org/clueweb12/">http://lemurproject.org/clueweb12/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zuccon</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Palotti</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Goeuriot</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kelly</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lupu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pecina</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Müller</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Daher</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Deacon</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The IR Task at the CLEF eHealth evaluation lab 2016: User-centred health information retrieval</article-title>
          <year>2016</year>
          <conf-name>CLEF 2016 - Conference and Labs of the Evaluation Forum</conf-name>
          <conf-date>September 5-8, 2016</conf-date>
          <conf-loc>Évora, Portugal</conf-loc>
          <fpage>255</fpage>
          <lpage>266</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ceur-ws.org/Vol-1609/16090015.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bennani-Smires</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Musat</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hossmann</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Baeriswyl</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Jaggi</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Simple unsupervised keyphrase extraction using sentence embeddings</article-title>
          <source>Proceedings of the 22nd Conference on Computational Natural Language Learning</source>
          <year>2018</year>
          <fpage>221</fpage>
          <lpage>229</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/K18-1022</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ogilvie</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Callan</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Combining document representations for known-item search</article-title>
          <source>SIGIR '03: Proceedings of the 26th annual international ACM SIGIR conference on Research and development in information retrieval</source>
          <year>2003</year>
          <fpage>143</fpage>
          <lpage>150</lpage>
          <pub-id pub-id-type="doi">10.1145/860462.860463</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Webber</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Moffat</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zobel</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A similarity measure for indefinite rankings</article-title>
          <source>ACM Trans. Inf. Syst</source>
          <year>2010</year>
          <month>11</month>
          <day>23</day>
          <volume>28</volume>
          <issue>4</issue>
          <fpage>1</fpage>
          <lpage>38</lpage>
          <pub-id pub-id-type="doi">10.1145/1852102.1852106</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Clarke</surname>
              <given-names>CLA</given-names>
            </name>
            <name name-style="western">
              <surname>Smucker</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Vtyurina</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Offline evaluation by maximum similarity to an ideal ranking</article-title>
          <source>CIKM '20: Proceedings of the 29th ACM International Conference on Information &#38; Knowledge Management</source>
          <year>2020</year>
          <fpage>225</fpage>
          <lpage>234</lpage>
          <pub-id pub-id-type="doi">10.1145/3340531.3411915</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="web">
          <source>Hugging Face</source>
          <access-date>2024-04-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://huggingface.co">https://huggingface.co</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Naderi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Mishra</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Teodoro</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Online health search via multi-dimensional information quality assessment based on deep language models</article-title>
          <source>MedRxiv</source>
          <comment>Preprint posted online on January 11, 2024</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.medrxiv.org/content/10.1101/2023.04.11.22281038v2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1101/2023.04.11.22281038</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pradeep</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Nogueira</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Scientific claim verification with VerT5erini</article-title>
          <source>Proceedings of the 12th International Workshop on Health Text Mining and Information Analysis</source>
          <year>2021</year>
          <fpage>94</fpage>
          <lpage>103</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2021.louhi-1.11.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abualsaud</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>IX</given-names>
            </name>
            <name name-style="western">
              <surname>Ghajar</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Minh</surname>
              <given-names>LNP</given-names>
            </name>
            <name name-style="western">
              <surname>Smucker</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Tahami</surname>
              <given-names>AV</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>UWaterlooMDS at the TREC 2021 Health Misinformation Track</article-title>
          <source>NIST Special Publication 500-335: The Thirtieth Text REtrieval Conference Proceedings (TREC 2021)</source>
          <year>2022</year>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://trec.nist.gov/pubs/trec30/papers/UwaterlooMDS-HM.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schlicht</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Paula</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Rosso</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>UPV at TREC Health Misinformation Track 2021 ranking with SBERT and quality</article-title>
          <source>NIST Special Publication 500-335: The Thirtieth Text REtrieval Conference Proceedings (TREC 2021)</source>
          <year>2022</year>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://trec.nist.gov/pubs/trec30/papers/UPV-HM.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref70">
        <label>70</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fernández-Pichel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Prada-Corral</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Losada</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Pichel</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Gamallo</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>CiTIUS at the TREC 2021 Health Misinformation Track</article-title>
          <source>NIST Special Publication 500-335: The Thirtieth Text REtrieval Conference Proceedings (TREC 2021)</source>
          <year>2022</year>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://trec.nist.gov/pubs/trec30/papers/CiTIUS-HM.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref71">
        <label>71</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Belkin</surname>
              <given-names>NJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kantor</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Fox</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Shaw</surname>
              <given-names>JA</given-names>
            </name>
          </person-group>
          <article-title>Combining the evidence of multiple query representations for information retrieval</article-title>
          <source>Information Processing &#38; Management</source>
          <year>1995</year>
          <month>5</month>
          <volume>31</volume>
          <issue>3</issue>
          <fpage>431</fpage>
          <lpage>448</lpage>
          <pub-id pub-id-type="doi">10.1016/0306-4573(94)00057-A</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref72">
        <label>72</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bondarenko</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fröbe</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gohsen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Günther</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kiesel</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Schwerter</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Syed</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Völske</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Potthast</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Stein</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hagen</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Webis at TREC 2021: Deep Learning, Health Misinformation, and Podcasts Tracks</article-title>
          <source>NIST Special Publication 500-335: The Thirtieth Text REtrieval Conference Proceedings (TREC 2021)</source>
          <year>2022</year>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://trec.nist.gov/pubs/trec30/papers/Webis-DL-HM-Pod.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref73">
        <label>73</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Teodoro</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Mottin</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gobeill</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gaudinat</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Vachon</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ruch</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Improving average ranking precision in user searches for biomedical research datasets</article-title>
          <source>Database (Oxford)</source>
          <year>2017</year>
          <month>01</month>
          <day>01</day>
          <volume>2017</volume>
          <fpage>bax083</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29220475"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/database/bax083</pub-id>
          <pub-id pub-id-type="medline">29220475</pub-id>
          <pub-id pub-id-type="pii">4600047</pub-id>
          <pub-id pub-id-type="pmcid">PMC5714153</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref74">
        <label>74</label>
        <nlm-citation citation-type="web">
          <article-title>2021 Health Misinformation Track</article-title>
          <source>TREC</source>
          <year>2022</year>
          <access-date>2024-04-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://trec.nist.gov/data/misinfo2021.html">https://trec.nist.gov/data/misinfo2021.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
