<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR AI</journal-id>
      <journal-title>JMIR AI</journal-title>
      <issn pub-type="epub">2817-1705</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v2i1e40167</article-id>
      <article-id pub-id-type="pmid">38464947</article-id>
      <article-id pub-id-type="doi">10.2196/40167</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Application of Artificial Intelligence to the Monitoring of Medication Adherence for Tuberculosis Treatment in Africa: Algorithm Development and Validation</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>El Emam</surname>
            <given-names>Khaled</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Klement</surname>
            <given-names>William</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Su</surname>
            <given-names>Zhaohui</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Sekandi</surname>
            <given-names>Juliet Nabbuye</given-names>
          </name>
          <degrees>MD, MSc, DrPH</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <address>
            <institution>Global Health Institute</institution>
            <institution>College of Public Health</institution>
            <institution>University of Georgia</institution>
            <addr-line>100 Foster Road</addr-line>
            <addr-line>Athens, GA, 30602</addr-line>
            <country>United States</country>
            <phone>1 706 542 5257</phone>
            <email>jsekandi@uga.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2430-1396</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Shi</surname>
            <given-names>Weili</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9907-2009</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Zhu</surname>
            <given-names>Ronghang</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1035-9044</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Kaggwa</surname>
            <given-names>Patrick</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1785-307X</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Mwebaze</surname>
            <given-names>Ernest</given-names>
          </name>
          <degrees>BSc, MSc, PhD</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <xref rid="aff7" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1305-6367</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Sheng</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1205-8632</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Epidemiology and Biostatistics</institution>
        <institution>College of Public Health</institution>
        <institution>University of Georgia</institution>
        <addr-line>Athens, GA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Global Health Institute</institution>
        <institution>College of Public Health</institution>
        <institution>University of Georgia</institution>
        <addr-line>Athens, GA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>School of Data Science</institution>
        <institution>University of Virginia</institution>
        <addr-line>Charlottesville, VA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>School of Computing</institution>
        <institution>College of Engineering &amp; Franklin College of Arts and Sciences</institution>
        <institution>University of Georgia</institution>
        <addr-line>Athens, GA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Department of Epidemiology and Biostatistics</institution>
        <institution>School of Public Health</institution>
        <institution>Makerere University</institution>
        <addr-line>Kampala</addr-line>
        <country>Uganda</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Sunbird AI</institution>
        <addr-line>Kampala</addr-line>
        <country>Uganda</country>
      </aff>
      <aff id="aff7">
        <label>7</label>
        <institution>Artificial Intelligence Research Lab</institution>
        <institution>College of Computing and Information Science</institution>
        <institution>Makerere University</institution>
        <addr-line>Kampala</addr-line>
        <country>Uganda</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Juliet Nabbuye Sekandi <email>jsekandi@uga.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2023</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>23</day>
        <month>2</month>
        <year>2023</year>
      </pub-date>
      <volume>2</volume>
      <elocation-id>e40167</elocation-id>
      <history>
        <date date-type="received">
          <day>8</day>
          <month>6</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>19</day>
          <month>7</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>17</day>
          <month>9</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>22</day>
          <month>1</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Juliet Nabbuye Sekandi, Weili Shi, Ronghang Zhu, Patrick Kaggwa, Ernest Mwebaze, Sheng Li. Originally published in JMIR AI (https://ai.jmir.org), 23.02.2023.</copyright-statement>
      <copyright-year>2023</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR AI, is properly cited. The complete bibliographic information, a link to the original publication on https://www.ai.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://ai.jmir.org/2023/1/e40167" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Artificial intelligence (AI) applications based on advanced deep learning methods in image recognition tasks can increase efficiency in the monitoring of medication adherence through automation. AI has sparsely been evaluated for the monitoring of medication adherence in clinical settings. However, AI has the potential to transform the way health care is delivered even in limited-resource settings such as Africa.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We aimed to pilot the development of a deep learning model for simple binary classification and confirmation of proper medication adherence to enhance efficiency in the use of video monitoring of patients in tuberculosis treatment.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We used a secondary data set of 861 video images of medication intake that were collected from consenting adult patients with tuberculosis in an institutional review board–approved study evaluating video-observed therapy in Uganda. The video images were processed through a series of steps to prepare them for use in a training model. First, we annotated videos using a specific protocol to eliminate those with poor quality. After the initial annotation step, 497 videos had sufficient quality for training the models. Among them, 405 were positive samples, whereas 92 were negative samples. With some preprocessing techniques, we obtained 160 frames with a size of 224 × 224 in each video. We used a deep learning framework that leveraged 4 convolutional neural networks models to extract visual features from the video frames and automatically perform binary classification of adherence or nonadherence. We evaluated the diagnostic properties of the different models using sensitivity, specificity, <italic>F</italic><sub>1</sub>-score, and precision. The area under the curve (AUC) was used to assess the discriminative performance and the speed per video review as a metric for model efficiency. We conducted a 5-fold internal cross-validation to determine the diagnostic and discriminative performance of the models. We did not conduct external validation due to a lack of publicly available data sets with specific medication intake video frames.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Diagnostic properties and discriminative performance from internal cross-validation were moderate to high in the binary classification tasks with 4 selected automated deep learning models. The sensitivity ranged from 92.8 to 95.8%, specificity from 43.5 to 55.4%, <italic>F</italic><sub>1</sub>-score from 0.91 to 0.92, precision from 88% to 90.1%, and AUC from 0.78 to 0.85. The 3D ResNet model had the highest precision, AUC, and speed.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>All 4 deep learning models showed comparable diagnostic properties and discriminative performance. The findings serve as a reasonable proof of concept to support the potential application of AI in the binary classification of video frames to predict medication adherence.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>artificial intelligence</kwd>
        <kwd>deep learning</kwd>
        <kwd>machine learning</kwd>
        <kwd>medication adherence</kwd>
        <kwd>digital technology</kwd>
        <kwd>digital health</kwd>
        <kwd>tuberculosis</kwd>
        <kwd>video directly observed therapy</kwd>
        <kwd>video therapy</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Tuberculosis (TB) is a leading cause of death worldwide, with an estimated 10.6 million new cases of the disease and 1.7 million patients dying in 2021 [<xref ref-type="bibr" rid="ref1">1</xref>]. The global <italic>End TB</italic> strategy set goals to eliminate disease, deaths, and burden by 2030 [<xref ref-type="bibr" rid="ref2">2</xref>], but these could be out of reach if critical gaps in diagnosis, treatment, and care are not addressed. Medication adherence, defined as the extent to which a person’s behavior regarding medication corresponds with agreed recommendations from a health care provider, is one of the barriers to TB control [<xref ref-type="bibr" rid="ref3">3</xref>]. It is estimated that 33% to 50% of patients who start treatment become nonadherent to their prescribed medication regimens [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. Nonadherence is associated with the emergence of drug resistance, prolonged infectiousness, treatment failure, and death, especially in the context of TB and HIV coinfection [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. The existing interventions to mitigate poor medication adherence have limited effectiveness for a variety of reasons [<xref ref-type="bibr" rid="ref5">5</xref>]. In Africa, a high patient load coupled with a severe shortage of health workers hampers proper monitoring and support of patients on TB treatment [<xref ref-type="bibr" rid="ref8">8</xref>]. Digital adherence technologies have rapidly emerged as tools for improving the delivery of care in a variety of health care settings [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. In 2017, the World Health Organization endorsed the use of video-based directly observed therapy (VDOT) as a suitable alternative to directly observed therapy for monitoring TB treatment and published guidance on its implementation [<xref ref-type="bibr" rid="ref10">10</xref>]. VDOT overcomes geographic barriers because it enables the health providers to view patients’ medication intake activity remotely, especially in the hard-to-reach populations [<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref13">13</xref>]. It also enhances autonomy since patients can choose when and where they take their TB medications [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref16">16</xref>]. The limitation with asynchronous VDOT is the repetitive manual task of reviewing videos and confirming daily adherence [<xref ref-type="bibr" rid="ref17">17</xref>]. Moreover, such classification tasks are accomplished by following a prespecified protocol [<xref ref-type="bibr" rid="ref18">18</xref>]. In the face of high patient workloads, repetitive manual tasks could lead to inaccurate assessment and human fatigue. High workload is a recognized occupational stressor that has implications for the quality of care and patient outcomes [<xref ref-type="bibr" rid="ref19">19</xref>]. The automation of routine processes is a well-known solution to increase efficiency in daily workflows. Therefore, more advanced tools such as artificial intelligence (AI) can be integrated with digital adherence technologies to accelerate widespread adoption and impact [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>].</p>
      <p>AI applications have the potential to transform health care in several clinical practice areas, primarily medical imaging [<xref ref-type="bibr" rid="ref22">22</xref>]. First, AI tools can increase productivity and the efficiency of care delivery by streamlining workflows in the health care systems [<xref ref-type="bibr" rid="ref23">23</xref>]. Second, AI can help improve the experience of health care workers, enabling them to spend more time in direct patient care and reducing stress-related burnout [<xref ref-type="bibr" rid="ref19">19</xref>]. Third, AI can support the faster delivery of care, by enhancing clinical decision-making, helping health care systems manage population health more proactively, and allocating resources to where they can have the largest impact [<xref ref-type="bibr" rid="ref24">24</xref>]. Modern computer vision techniques powered by deep learning convolutional neural networks (DCNNs) can be applied to medical imaging, medical videos, and clinical deployment [<xref ref-type="bibr" rid="ref25">25</xref>]. Deep learning techniques that process raw data to perform classification or detection tasks can make digital adherence monitoring in TB control more effective and efficient. DCNNs are state-of-the-art machine learning algorithms that have the ability to learn from input data to recognize intricate activities and patterns [<xref ref-type="bibr" rid="ref26">26</xref>]. These characteristics make DCNNs powerful tools for recognition, classification, and prediction. Moreover, the features discovered by the models are not predetermined by human experts but rather by the patterns they learn from input data [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>]. This concept can be applied to patterns in the videos of medication intake. However, the development and implementation of deep learning methods in health care remain largely limited because of a lack of access to large, well-curated, and labeled data sets. Additionally, specific technical knowledge, skills, and expertise required to develop deep learning models are often uncommon among health care professionals [<xref ref-type="bibr" rid="ref27">27</xref>]. The goal of our pilot was to conduct a proof of concept for the development of an AI system that can perform routine classification tasks applicable to medication adherence. We expect that this initial step will be the basis for further development and validation of AI tools that will be used across treatments in chronic diseases in a variety of clinical settings.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Study Design, Population, and Data Sources</title>
        <p>In this pilot study, a multidisciplinary team consisting of a physician scientist with expertise in TB medication adherence; 2 computer scientists with expertise in machine learning, computer vision, and deep learning models; and 3 graduate students in computer science evaluated the technical feasibility of applying AI to analyze a raw data set of videos from patients with TB taking medications. We used a secondary data set of 861 self-recorded medication intake videos collected as part of a pilot VDOT study of 51 patients with TB. The pilot study was conducted in Uganda.</p>
      </sec>
      <sec>
        <title>Ethical Approval</title>
        <p>The study was approved by the Institutional Review Board Office of Research, University of Georgia (number PROJECT00002406) and the Makerere University Higher Degrees, Research and Ethics Committee in Uganda (number 756).</p>
      </sec>
      <sec>
        <title>Patient Recruitment and Enrollment</title>
        <p>A cohort of adult male and female patients aged 18-65 years with a confirmed diagnosis of TB attending public clinics in Kampala, Uganda, were enrolled in VDOT pilot studies from July 2018 to December 2020. The study evaluated the effectiveness of VDOT in monitoring adherence where daily medication intake videos were collected with the patients’ written consent. Further details on the eligibility criteria and sociodemographic characteristics of the patients contributing to the video data sets are published elsewhere [<xref ref-type="bibr" rid="ref16">16</xref>].</p>
      </sec>
      <sec>
        <title>Process of Annotation and Labeling of Medication Videos</title>
        <p>First, a team of 3 trained video annotators with a computer science background evaluated the videos in the primary medication intake data set to create a new medication intake video data set. Using a systematic iterative process of review and discussions, the research team developed a protocol for video annotation de novo, since no specific protocols existed for medication videos. The team included the 3 trained student annotators, a senior computer scientist, and a physician with expertise in medication adherence. The protocol was summarized into 3 basic rules that guided labeling videos as <italic>positive</italic>—actual medication ingestion activity, <italic>negative</italic>—no medication intake activities, or <italic>ambiguous—</italic>if no pills were seen but there was a blurry image of a face, as described in <xref ref-type="table" rid="table1">Table 1</xref>. We used the de novo standardized protocol for labeling videos. To control the quality of the annotation, we only considered videos where there was complete agreement of the classification across the 3 annotators to create the final video data set for model training and evaluation. After the annotation process, out of 861 videos, we kept 497 videos, which consisted of 405 (47%) positive videos and 92 (10%) negative videos. The sex and class distribution of videos that were kept in the final data set was as follows: of the 405 positive videos from 51 patients, 248 (61.2%) were from 28 male patients and 157 (38.7%) videos were from 23 female patients. Only 36 patients produced 92 negative videos; 48 (52%) were from 19 male patients, and 44 (48%) were from 17 female patients. The average distribution was 8 positive videos and 2 negative videos per patient. The outcome of this process resulted in the medication intake video data set that was used as a training data set for the deep learning model. Second, we divided the data set into training and validation subsets to assess the performance of our deep learning framework and baselines on medication adherence recognition. Furthermore, we analyzed the influence of different deep learning architectures in our framework on medication adherence recognition, classification, and prediction. It is important to note that the video annotation process is only required to construct the data set for model training and evaluation of this study. Once the deep learning model is trained, we do not need manual annotations anymore for the new videos, when using the proposed methods in practice.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>The rules for video annotation, labeling, and outcome of the video data set.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="320"/>
            <col width="500"/>
            <col width="180"/>
            <thead>
              <tr valign="top">
                <td>Labels</td>
                <td>Description</td>
                <td>Videos (N=861), n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Positive: actual medication ingestion activities=adherence</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Videos show clear visibility of the face, pill, and water bottle</p>
                    </list-item>
                    <list-item>
                      <p>Patient exhibits clear action of taking pills and drinking water</p>
                    </list-item>
                    <list-item>
                      <p>Good illumination</p>
                    </list-item>
                  </list>
                </td>
                <td>405 (47)</td>
              </tr>
              <tr valign="top">
                <td>Negative: no actual medication ingestion activities=nonadherence</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Face of patient seen</p>
                    </list-item>
                    <list-item>
                      <p>No pills are detected</p>
                    </list-item>
                    <list-item>
                      <p>Patient does not put the pills into his or her mouth or there is no action of drinking water</p>
                    </list-item>
                    <list-item>
                      <p>Good illumination</p>
                    </list-item>
                  </list>
                </td>
                <td>92 (10)</td>
              </tr>
              <tr valign="top">
                <td>Excluded videos</td>
                <td>—<sup>a</sup></td>
                <td>364 (42.3)</td>
              </tr>
              <tr valign="top">
                <td>Ambiguous or uncertain videos</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Pills not seen</p>
                    </list-item>
                    <list-item>
                      <p>Blurry faces and hands</p>
                    </list-item>
                  </list>
                </td>
                <td>157 (18.2)</td>
              </tr>
              <tr valign="top">
                <td>Poor quality videos</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Poor illumination</p>
                    </list-item>
                    <list-item>
                      <p>Face of patient not seen</p>
                    </list-item>
                  </list>
                </td>
                <td>152 (17.7)</td>
              </tr>
              <tr valign="top">
                <td>Damaged videos</td>
                <td>Not reviewed</td>
                <td>55 (6)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>Not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Preprocessing of the Annotated Medication Intake Videos</title>
        <p>Before we used AI tools to analyze the medication adherence of the patients, some techniques were implemented to preprocess the videos. The video-preprocessing stage was divided into 3 parts. In the first part, each video was converted to the mp4 format since the mp4 format is more convenient to process than the original format of the raw videos. Next, we adopted FFmpeg, a leading multimedia framework, to extract the video frames from each video with the mp4 format. Nevertheless, not all the video frames were relevant to the medication adherence, and the number of the video frames for each video was quite different, which also posed a problem in our study. In the end, we manually extracted the same number of key video frames that were the most relevant to medication adherence. These video frames constituted the final data set for our AI experiments.</p>
      </sec>
      <sec>
        <title>Model Development: Deep Learning Framework</title>
        <p>Our deep learning framework for recognizing medication intake activities consisted of 2 parts: first, convolutional neural networks (CNNs) were used to extract visual features from medication intake videos; and second, support vector machine (SVM) [<xref ref-type="bibr" rid="ref29">29</xref>] was adopted as a classifier to generate prediction scores for videos as shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>. In particular, inspired by the huge success of deep learning models in image and video analysis, we used 2D CNN and 3D CNN models to extract the high-dimensional, spatiotemporal features from input videos. These models were pretrained on large-scale, labeled image or video data sets. Then, the SVM, an effective classifier, was trained to classify the extracted high-dimensional features. Our framework consisted of DCNNs pretrained with external data sets: Inception-v4 [<xref ref-type="bibr" rid="ref30">30</xref>]; 3D ResNet, designed for lower complexity structure with so-called skip residual connections [<xref ref-type="bibr" rid="ref31">31</xref>]; 3D ResNext [<xref ref-type="bibr" rid="ref32">32</xref>]; and Inflated 3D [<xref ref-type="bibr" rid="ref33">33</xref>]. These DCNNs are extensively used by the computer science community for extracting features from images and videos [<xref ref-type="bibr" rid="ref34">34</xref>]. Specifically, Inception-v4 is pretrained on the ImageNet data set [<xref ref-type="bibr" rid="ref35">35</xref>]. 3D ResNet, 3D ResNext, and Inflated 3D are pretrained on the Kinetics data set [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>]. Besides, the sizes of the feature vectors from each model are different. For instance, the length of the feature vector generated from Inception-v4 is 1536, whereas the length of the feature vector is 2048 from 3D ResNet and 3D ResNext. The details of the feature length are illustrated in <xref ref-type="table" rid="table2">Table 2</xref>. In the training stage, we trained the SVM with features extracted by the pretrained DCNNs from the training data set. In the testing stage, our trained model, which consists of a DCNN and SVM, generated prediction scores for videos from the testing data set to recognize the medication adherence. The generated prediction score is a decimal number between 0 and 1, which can be interpreted as the probability that the video represents a patient correctly ingesting their medication.</p>
        <p>These DCNN models are designed primarily to extract the feature from images, but they cannot deal with videos directly, due to the 3D structure of video data. To tackle this problem, various 3D CNN models have been developed, in which the 2D convolution operation is extended to 3D convolution operation. The 3D ResNet and 3D ResNext used in our study are built on the 2D CNN model ResNet [<xref ref-type="bibr" rid="ref31">31</xref>] that introduces the idea of residual connections. <xref rid="figure2" ref-type="fig">Figure 2</xref> illustrates the building blocks of the ResNet, 3D ResNet, and 3D ResNext. All 3 blocks consist of 3 convolution layers followed by batch normalization [<xref ref-type="bibr" rid="ref32">32</xref>], rectified linear unit [<xref ref-type="bibr" rid="ref33">33</xref>], and identity mapping [<xref ref-type="bibr" rid="ref31">31</xref>]. The major difference is that the 2D convolution kernels (1 × 1 and 3 × 3) in ResNet are modified to 3D convolution kernels (1 × 1 × 1 and 3 × 3 × 3) in 3D ResNet and 3D ResNext. Compared to 3D ResNet, 3D ResNext introduces the group convolutions in the second layer of the block, which divides the feature maps into small groups. In practice, 3D ResNet and 3D ResNext are typically composed of multiple layers [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>].</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Illustration of deep learning framework with feature extractor CNNs and classifier SVM. Different grey colors represent labeled videos, and black color denotes unlabeled videos. CNN: convolution neural network; SVM: support vector machine.</p>
          </caption>
          <graphic xlink:href="ai_v2i1e40167_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>The number of the features with its corresponding model.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td>Features, n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>HOG<sup>a</sup></td>
                <td>16,740</td>
              </tr>
              <tr valign="top">
                <td>Inception-v4</td>
                <td>1536</td>
              </tr>
              <tr valign="top">
                <td>3D ResNet</td>
                <td>2048</td>
              </tr>
              <tr valign="top">
                <td>3D ResNext</td>
                <td>2048</td>
              </tr>
              <tr valign="top">
                <td>Inflated 3D</td>
                <td>1024</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>HOG: histogram of oriented gradient.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Illustration of the building block of (a) ResNet, (b) 3D ResNet, and (c) 3D ResNext. BN: batch normalization; conv: convolution; F: number of feature channels; ReLu: rectified linear unit .</p>
          </caption>
          <graphic xlink:href="ai_v2i1e40167_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Apart from 3D ResNet and 3D ResNext, we also used Inception-v4 and Inflated 3D as our feature extractors. As a 2D CNN model, Inception-v4 is the fourth version of the Inception architecture network family. Compared to previous versions of the Inception family, Inception-v4 not only has a more uniformly simplified architecture and more inception modules but also absorbs the idea of residual connections from ResNet to form the new Inception block called residual inception blocks. Inflated 3D is another 3D CNN, which is built upon a 2D CNN from the Inception family. In our study, we compared the performance of one 2D CNN (Inception-v4) and three 3D CNNs (ie, 3D ResNet, 3D ResNext, and Inflated 3D). The 2D CNN treated each video as a set of video frames and generated a feature vector for each video frame, whereas 3D CNNs took video as a whole and generated a unified feature vector.</p>
        <p>To better illustrate the effectiveness of deep learning models for medication adherence recognition, we used a traditional visual feature descriptor, histogram of oriented gradient (HOG) [<xref ref-type="bibr" rid="ref38">38</xref>], as the replacement of the features extracted by DCNNs. HOG is a traditional descriptor that can generate handcrafted features directly from the images. The handcrafted feature was fed into the SVM for classification. In our pilot study, the SVM with HOG features was used as a baseline. Besides, we also investigated the average time of each method to extract features from the video frames, since efficiency is also an important indicator to evaluate the methods in practice.</p>
      </sec>
      <sec>
        <title>Statistical Analysis</title>
        <p>We adopted a 5-fold cross-validation strategy to evaluate the performance of our deep learning framework with different DCNNs as it is the recommended best practice for model validation [<xref ref-type="bibr" rid="ref39">39</xref>]. We chose 5-fold cross-validation since it offers a good trade-off between efficiency and reliability, compared with alternative strategies such as leave-one-out cross-validation or random splits. In the experiments, we evaluated the performance of our framework from different aspects by using 5 metrics: the area under the receiver operating characteristic (ROC) curve (AUC) and <italic>F</italic><sub>1</sub>-score, which are primary evaluation metrics, and sensitivity (recall), specificity, and precision (positive predictive value), which are supplementary. The <italic>F</italic><sub>1</sub>-score can be interpreted as the harmonic mean of precision and recall. We empirically set the threshold to 0.6 to neutralize the adverse effect of the imbalanced distribution of the data. For each given DCNN in our framework, we randomly split the data set into 5 subsets: 4 out of 5 subsets were used as the training data set, and the rest were adopted as the testing data set. We ran the 5-fold cross-validation 5 times. Each time, we randomly shuffled the order of the data before feeding the data into the model and reporting the mean values and SDs for each metric. Furthermore, another comparison experiment was implemented to show that our framework does not suffer from an overfitting problem with the high-dimensional features. Besides, we also drew the ROC curves to demonstrate the performance of different CNNs. We also evaluated the efficiency using speed in seconds as a metric defining the time required to extract features from the videos relevant to medications adherence. In addition, we noticed that metrics such as precision still have some limitations in the presence of class imbalance. This problem can be mitigated by adjusting the classification threshold.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Performance in the Monitoring of Medication Adherence</title>
        <p>3D ResNet achieved the best performance in the task of monitoring patient medication adherence activities as shown in <xref ref-type="table" rid="table3">Table 3</xref>. The performance of 3D ResNext was very close to that of 3D ResNet since they both have similar structure. Besides, the results also reveal that 3D CNN models had better performance than the 2D CNN model and traditional feature descriptor method. Specifically, the HOG method obtained the lowest values on all metrics. It is noted that 3D ResNet, 3D ResNext, and Inflated 3D are specifically designed for video feature extraction, whereas Inception-v4 is designed for image feature extraction. Overall, the performances of the 3D ResNet and 3D ResNext were very comparable in all the metrics. The 3D ResNet obtained the best results on the AUC, highlighting its advantage in the prediction of the medication adherence activity.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Performance of the proposed deep learning framework under different convolution neural networks and histogram of oriented gradient (HOG).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="150"/>
            <col width="170"/>
            <col width="170"/>
            <col width="170"/>
            <col width="180"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td>Feature extractor</td>
                <td>Sensitivity, mean (SD)</td>
                <td>Specificity, mean (SD)</td>
                <td>Precision, mean (SD)</td>
                <td><italic>F</italic><sub>1</sub>-score, mean (SD)</td>
                <td>AUC<sup>a</sup>, mean (SD)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>HOG</td>
                <td>90.77 (2.62)</td>
                <td>27.35 (8.98)</td>
                <td>85.03 (1.86)</td>
                <td>87.77 (1.41)</td>
                <td>0.65 (0.06)</td>
              </tr>
              <tr valign="top">
                <td>Inception-v4</td>
                <td>92.54 (3.53)</td>
                <td>43.70 (8.64)</td>
                <td>87.91 (1.95)</td>
                <td>90.12 (1.90)</td>
                <td>0.80 (0.05)</td>
              </tr>
              <tr valign="top">
                <td>3D ResNet</td>
                <td><italic>94.57</italic><sup>b</sup> (2.61)</td>
                <td><italic>54.57</italic> (<italic>6.46</italic>)</td>
                <td><italic>90.20</italic> (<italic>1.81</italic>)</td>
                <td><italic>92.30</italic> (1.44)</td>
                <td><italic>0.87</italic> (<italic>0.04</italic>)</td>
              </tr>
              <tr valign="top">
                <td>3D ResNext</td>
                <td>94.17 (2.67)</td>
                <td>51.74 (7.33)</td>
                <td>89.62 (2.21)</td>
                <td>91.81 (1.82)</td>
                <td>0.85 (0.05)</td>
              </tr>
              <tr valign="top">
                <td>Inflated 3D</td>
                <td>92.94 (<italic>3.47</italic>)</td>
                <td>49.78 (8.00)</td>
                <td>89.08 (1.85)</td>
                <td>90.94 (<italic>2.24</italic>)</td>
                <td>0.82 (0.06)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>AUC: area under the curve.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>Italicized numbers represent the best result under each metric.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Assessing Overfitting of the Model</title>
        <p>AI models usually suffer from the overfitting problem with high-dimensional features and limited number of training data. To further investigate whether high-dimensional features would cause the overfitting problem or not, we conducted additional experiments to give a better illustration. In this experiment, we used the pretrained 3D ResNet as the feature extractor and reduced the original feature dimension from 2048 to 256 with the principal component analysis method. The results are shown in <xref ref-type="table" rid="table4">Table 4</xref>. We observed that both of dimensions achieved similar performance, which confirmed that our framework was not affected much by the overfitting problem.</p>
        <p>The ROC curves in <xref rid="figure3" ref-type="fig">Figure 3</xref> were generated by plotting the true positive rate (sensitivity) against the false positive rate (specificity) at different threshold settings. The diagonal straight dashed line from (0,0) to (1,1) represents the performance of the random classifier. Ideally, all the ROC curves should lie above the straight dashed line. The further the curve deviates from the diagonal line, the better the classifier is. The curves in <xref rid="figure3" ref-type="fig">Figure 3</xref> can be divided into 3 groups. The first group representing 3D ResNet and 3D ResNext show that the 2 curves were the closest to the y-axis with the highest AUC. The second group consists of Inception-v4 and Inflated 3D, with AUCs of 0.78 and 0.80. The worst performing classifier was the traditional model HOG, which is very close to the diagonal line, and its AUC is only 0.60.</p>
        <p>We also investigated the time efficiency of each method in our study and the results are illustrated in <xref ref-type="table" rid="table5">Table 5</xref>. The machine that ran the code consisted of 2 Intel E4208 CPUs and 1 P100 Tesla GPU. We evaluated the average time spent per video by each method to generate the relevant features. 3D ResNet was the fastest and took only 0.54 seconds to generate the features for each video, whereas HOG was the slowest, spending on average 4.53 seconds—8 times longer to generate the handcrafted features from a single video, signifying its inferiority in efficiency. The speeds of 3D ResNext and Inflated 3D were relatively comparable, whereas Inception-v4 was slower than the other DCNNs. Overall, considering both the model’s accuracy and efficiency, 3D ResNet might be the better model because it has both high accuracy and efficiency of processing videos.</p>
        <p>The class imbalance between positive and negative videos was pronounced in our data at a ratio of 405:92, respectively. To remedy the potential detrimental effect of the class imbalance in our data, we used a simple but effective method of adjusting the classification threshold [<xref ref-type="bibr" rid="ref40">40</xref>]. We conducted experiments to illustrate how different threshold values affected the performance of our model. In the experiment, we used 3D ResNet as the feature extractor and chose 3 threshold values: 0.5, 0.6, and 0.7. Five-fold cross-validation with fixed splits was adopted as shown in <xref ref-type="table" rid="table6">Table 6</xref>. We see that higher threshold values would lead to higher specificity and precision values but slightly lower sensitivity and <italic>F</italic><sub>1</sub>-score values. Adjusting the classification threshold helped to balance the sensitivity and specificity.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Performance of the proposed deep learning framework with different dimensions of features. 3D ResNet was adopted as the feature extractor.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="270"/>
            <col width="140"/>
            <col width="140"/>
            <col width="140"/>
            <col width="140"/>
            <col width="170"/>
            <thead>
              <tr valign="top">
                <td>Number of dimensions</td>
                <td>Sensitivity</td>
                <td>Specificity</td>
                <td>Precision</td>
                <td><italic>F</italic><sub>1</sub>-score</td>
                <td>AUC<sup>a</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>256</td>
                <td>93.09</td>
                <td>51.09</td>
                <td>89.39</td>
                <td>91.12</td>
                <td>0.83</td>
              </tr>
              <tr valign="top">
                <td>2048</td>
                <td>94.57</td>
                <td>54.35</td>
                <td>90.17</td>
                <td>92.26</td>
                <td>0.86</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>AUC: area under the curve.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Receiver operator curves for monitoring the medication adherence from models in our framework. AUC: area under the curve; HOG: histogram of oriented gradient.</p>
          </caption>
          <graphic xlink:href="ai_v2i1e40167_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>The average time spent per video by each model.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td>Method</td>
                <td>Time (seconds)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>HOG<sup>a</sup></td>
                <td>4.53</td>
              </tr>
              <tr valign="top">
                <td>Inception-v4</td>
                <td>2.38</td>
              </tr>
              <tr valign="top">
                <td>Inflated 3D</td>
                <td>0.98</td>
              </tr>
              <tr valign="top">
                <td>3D ResNext</td>
                <td>0.6</td>
              </tr>
              <tr valign="top">
                <td>3D ResNet</td>
                <td>0.54</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>HOG: histogram of oriented gradient.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Performance of the proposed deep learning framework with different classification thresholds. 3D ResNet was adopted as the feature extractor.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Threshold</td>
                <td>Sensitivity</td>
                <td>Specificity</td>
                <td>Precision</td>
                <td><italic>F</italic><sub>1</sub>-score</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>0.5</td>
                <td>96.79</td>
                <td>43.48</td>
                <td>88.34</td>
                <td>92.34</td>
              </tr>
              <tr valign="top">
                <td>0.6</td>
                <td>94.57</td>
                <td>54.35</td>
                <td>90.17</td>
                <td>92.26</td>
              </tr>
              <tr valign="top">
                <td>0.7</td>
                <td>88.64</td>
                <td>67.39</td>
                <td>92.31</td>
                <td>90.37</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Finding</title>
        <p>In this pilot project, we demonstrated a reasonable proof of concept that deep learning and AI techniques could be applied to advance support medication adherence monitoring. We tested 4 deep learning models and found that 3D ResNet performed best at an AUC of 0.84 and a speed of 0.54 seconds per video review. The level of discriminatory accuracy obtained is comparable to other machine learning algorithms that have been shown to achieve a diagnostic accuracy ranging from 72.5% to 77.3% in clinical settings. This level is similar to or higher than the expert clinical accuracy of doctors [<xref ref-type="bibr" rid="ref41">41</xref>]. Spatiotemporal models for action classification used in nonmedical fields have shown even better performance with an average accuracy of 90% [<xref ref-type="bibr" rid="ref42">42</xref>]. A systematic review and meta-analysis of 69 studies comparing deep learning models against health care professionals concluded that both approaches were equivalent in diagnostic accuracy [<xref ref-type="bibr" rid="ref43">43</xref>]. To our knowledge, this is the first pilot study to evaluate deep learning models for specific application to digital technologies and medication adherence in Africa.</p>
        <p>Our model results could be limited by the relatively pronounced class imbalance between positive and negative samples in the data. To address the class imbalance problem, we adjusted the classification thresholds for the 3D ResNet model to better balance the sensitivity and specificity. Specifically, we varied the thresholds at 0.5, 0.6, and 0.7 and found that across the range, sensitivity decreased slightly by 8% whereas specificity increased by 55%, thus improving the performance of the model. This means that by adjusting the classification threshold to 0.7, the model’s ability to correctly identify persons who are not taking medications could be achieved. The relatively high performance of the deep learning models signifies the power of AI tools that can be harnessed for medication monitoring in routine clinical care or drug efficacy trials. We also acknowledge that our current experimental settings may lead to issues such as overfitting and data leakage, which are possible limitations to our findings. This could be due to the high dimensionality of features extracted by deep learning models and the small set of patients used in our study. In addition, the stratification is performed at the video level, and thus, it is possible that the videos from the same patient may appear in both training and test phases during cross-validation. Ideally, there is need to perform evaluations with stratification at the patient level; this step will be a priority in our future work. This pilot study is a valuable initial step for building more robust models that have relevant applications suitable for the local African context where the medication intake videos were collected. In the era of COVID-19 pandemic, the use of synchronous telehealth visits proved to be an extremely valuable care delivery approach when in-person provider-patient interactions were not possible [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>]. Our proof-of-concept study explores the use of AI to bolster the utility of asynchronous remote provider-provider interactions. The evolving capacity of digital technologies to store and analyze various types of data will continue to revolutionize health care delivery in both resource-limited and resource-rich countries.</p>
        <p>There are some strengths of this pilot study. For example, this is the first study that attempted to build and evaluate deep learning models using video images of TB medication intake from Uganda and the rest of Africa. We also developed a preliminary protocol for the annotation of medication video that can be refined further for use in low-income countries. This protocol was generated through a systematic iterative process of reviewing, discussing, and refining among a team of 3 trained video annotators who were computer science graduate students supervised by an expert in the field. Our pilot work builds on the existing literature and aspiration to expand the use of AI in routine health care [<xref ref-type="bibr" rid="ref43">43</xref>] and, specifically, medication adherence monitoring [<xref ref-type="bibr" rid="ref3">3</xref>]. By examining the utility of AI-based models, we are taking steps toward accelerating the future scale-up of digital adherence technologies in remote medication monitoring in TB, HIV/AIDS, and other chronic health conditions. The study was limited to the evaluation of the technical feasibility of developing a deep learning model. We did not incorporate all the recommended methodological features for the clinical validation of AI performance in real-world practice [<xref ref-type="bibr" rid="ref46">46</xref>]. Indeed, we acknowledge that comprehensive validation is a critical next step for this work.</p>
        <p>We also plan to develop new methods and evaluation protocols for the class-imbalanced settings in our future work.</p>
        <p>It is worth noting that the same patient had multiple videos, which may introduce dependencies between images of the same patient and make the cross-validation less trustworthy. However, we clearly observed that the videos from the same patient had substantial differences in visual appearance. For example, some videos were recorded indoors whereas others were recorded outdoors, the same patient wore different clothes in different videos, and the viewpoints of video recording were also different. Furthermore, our method aimed to detect and understand the human medication adherence activities under a series of video frames. For instance, our model had to focus on specific key actions, for example, putting the pills into the mouth and drinking water, while trying to ignore the influence of the environment in the video frames. Although we used the video level to conduct the 5-fold cross-validation, the variance of the environment for videos from the same patient could present a challenge for our model to identify whether the patient has taken the pill or not.</p>
      </sec>
      <sec>
        <title>Future Implications and Recommendations</title>
        <p>Future work should be focused on improving the classification accuracy of deep learning models in medication adherence. First, there is a need for open-sourcing of large, labeled data sets with which to train the algorithms, especially in the African context. Second, additional techniques are needed to address class imbalance to improve the classification performance of deep learning models. Lastly, we propose to apply self-supervised learning methods, which provide a new way to pretrain DCNNs by exploiting pseudo-training labels that eliminates the time-consuming tasks of manual annotation. In our current deep learning framework, models are pretrained with external data sets, which may not be suitable for the extraction of visual features to classify medication adherence and nonadherence activities. All the neural network models showed comparable discriminative performance and diagnostic properties to state-of-the-art–performing deep learning algorithms. The findings serve as a reasonable proof of concept to support the potential utility of deep learning models in the binary classification of medication video frames to predict adherence. The success and widespread use of AI technologies will depend on data storage capacity, processing power, and other infrastructure capacities within health care systems [<xref ref-type="bibr" rid="ref3">3</xref>]. Research is needed to evaluate the effectiveness of AI solutions in different patient groups and establish the barriers to widespread adoption of digital health technologies.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Our findings in this pilot study show the potential application of pretrained deep learning models and AI for the classification of medication adherence based on a unique video data set drawn in the African setting. The 3D ResNet model showed the best performance in relation to speed and discriminatory performance. Further development of AI tools to improve the monitoring of medication adherence could advance this field in public health, especially in low-resource settings.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AUC</term>
          <def>
            <p>area under the curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">DCNN</term>
          <def>
            <p>deep learning convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">HOG</term>
          <def>
            <p>histogram of oriented gradient</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">ROC</term>
          <def>
            <p>receiver operating characteristic</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">TB</term>
          <def>
            <p>tuberculosis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">VDOT</term>
          <def>
            <p>video-based directly observed therapy</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We would like to thank Dr Esther Buregyeya, Dr Sarah Zalwango, and the field research team members in Uganda—Damalie Nakkonde, Gloria Nassanga, Daphine Kyaine, and Michelle Geno—for their assistance in collecting the video data for the research.</p>
      <p>This work was supported by the National Center for Advancing Translational Sciences of the National Institutes of Health under award number UL1TR002378. The video data were collected with funding support from the National Institutes of Health Fogarty International Center under award number R21 TW011365. The funders had no role in the design, analysis, and interpretation of the study results. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>JNS, WS, RZ, and SL researched literature and conceived the study. JNS was involved in seeking ethical approval and patient recruitment. JNS, WS, RZ, EM, SL, and PEK were involved in protocol development and data analysis. JSN and SL wrote the first draft of the manuscript. All authors reviewed and edited the manuscript and approved the final version of the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <article-title>Global tuberculosis report 2022</article-title>
          <source>World Health Organization</source>
          <year>2022</year>
          <month>10</month>
          <day>27</day>
          <access-date>2023-02-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/publications/i/item/9789240061729">https://www.who.int/publications/i/item/9789240061729</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>World Health Organization</collab>
            <collab>European Respiratory Society</collab>
          </person-group>
          <article-title>Digital health for the end TB strategy: an agenda for action</article-title>
          <source>World Health Organization</source>
          <year>2015</year>
          <access-date>2023-02-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://apps.who.int/iris/handle/10665/205222">https://apps.who.int/iris/handle/10665/205222</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Babel</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Taneja</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mondello Malvestiti</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Monaco</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Donde</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence solutions to increase medication adherence in patients with non-communicable diseases</article-title>
          <source>Front Digit Health</source>
          <year>2021</year>
          <month>6</month>
          <day>29</day>
          <volume>3</volume>
          <fpage>669869</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34713142"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fdgth.2021.669869</pub-id>
          <pub-id pub-id-type="medline">34713142</pub-id>
          <pub-id pub-id-type="pmcid">PMC8521858</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Anuwatnonthakate</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Limsomboon</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Nateniyom</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wattanaamornkiat</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Komsakorn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Moolphate</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chiengsorn</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kaewsa-Ard</surname>
              <given-names>Samroui</given-names>
            </name>
            <name name-style="western">
              <surname>Sombat</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Siangphoe</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Mock</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Varma</surname>
              <given-names>JK</given-names>
            </name>
          </person-group>
          <article-title>Directly observed therapy and improved tuberculosis treatment outcomes in Thailand</article-title>
          <source>PLoS One</source>
          <year>2008</year>
          <month>08</month>
          <day>28</day>
          <volume>3</volume>
          <issue>8</issue>
          <fpage>e3089</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0003089"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0003089</pub-id>
          <pub-id pub-id-type="medline">18769479</pub-id>
          <pub-id pub-id-type="pmcid">PMC2518105</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alipanah</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Jarlsberg</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Linh</surname>
              <given-names>NN</given-names>
            </name>
            <name name-style="western">
              <surname>Falzon</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Jaramillo</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Nahid</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Adherence interventions and outcomes of tuberculosis treatment: a systematic review and meta-analysis of trials and observational studies</article-title>
          <source>PLoS Med</source>
          <year>2018</year>
          <month>07</month>
          <day>3</day>
          <volume>15</volume>
          <issue>7</issue>
          <fpage>e1002595</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pmed.1002595"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pmed.1002595</pub-id>
          <pub-id pub-id-type="medline">29969463</pub-id>
          <pub-id pub-id-type="pii">PMEDICINE-D-17-04267</pub-id>
          <pub-id pub-id-type="pmcid">PMC6029765</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Waitt</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Squire</surname>
              <given-names>SB</given-names>
            </name>
          </person-group>
          <article-title>A systematic review of risk factors for death in adults during and after tuberculosis treatment</article-title>
          <source>Int J Tuberc Lung Dis</source>
          <year>2011</year>
          <month>07</month>
          <day>01</day>
          <volume>15</volume>
          <issue>7</issue>
          <fpage>871</fpage>
          <lpage>85</lpage>
          <pub-id pub-id-type="doi">10.5588/ijtld.10.0352</pub-id>
          <pub-id pub-id-type="medline">21496360</pub-id>
          <pub-id pub-id-type="pii">100352</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Adane</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Alene</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Koye</surname>
              <given-names>DN</given-names>
            </name>
            <name name-style="western">
              <surname>Zeleke</surname>
              <given-names>BM</given-names>
            </name>
          </person-group>
          <article-title>Non-adherence to anti-tuberculosis treatment and determinant factors among patients with tuberculosis in northwest Ethiopia</article-title>
          <source>PLoS One</source>
          <year>2013</year>
          <month>11</month>
          <day>11</day>
          <volume>8</volume>
          <issue>11</issue>
          <fpage>e78791</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0078791"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0078791</pub-id>
          <pub-id pub-id-type="medline">24244364</pub-id>
          <pub-id pub-id-type="pii">PONE-D-13-31718</pub-id>
          <pub-id pub-id-type="pmcid">PMC3823971</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bulage</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sekandi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kigenyi</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Mupere</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>The quality of tuberculosis services in health care centres in a rural district in Uganda: the providers' and clients' perspective</article-title>
          <source>Tuberc Res Treat</source>
          <year>2014</year>
          <month>9</month>
          <day>7</day>
          <volume>2014</volume>
          <fpage>685982</fpage>
          <lpage>11</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1155/2014/685982"/>
          </comment>
          <pub-id pub-id-type="doi">10.1155/2014/685982</pub-id>
          <pub-id pub-id-type="medline">25276424</pub-id>
          <pub-id pub-id-type="pmcid">PMC4170836</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>WHO Global Observatory for eHealth</collab>
          </person-group>
          <article-title>mHealth: new horizons for health through mobile technologies: second global survey on eHealth</article-title>
          <source>World Health Organization</source>
          <year>2011</year>
          <access-date>2023-02-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://apps.who.int/iris/handle/10665/44607">https://apps.who.int/iris/handle/10665/44607</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="web">
          <article-title>Guidelines for treatment of drug-susceptible tuberculosis and patient care (2017 update)</article-title>
          <source>World Health Organization</source>
          <year>2017</year>
          <access-date>2023-02-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://apps.who.int/iris/bitstream/handle/10665/255052/9789241550000-eng.pdf">https://apps.who.int/iris/bitstream/handle/10665/255052/9789241550000-eng.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garfein</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Doshi</surname>
              <given-names>RP</given-names>
            </name>
          </person-group>
          <article-title>Synchronous and asynchronous video observed therapy (VOT) for tuberculosis treatment adherence monitoring and support</article-title>
          <source>J Clin Tuberc Other Mycobact Dis</source>
          <year>2019</year>
          <month>12</month>
          <volume>17</volume>
          <fpage>100098</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2405-5794(18)30038-X"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jctube.2019.100098</pub-id>
          <pub-id pub-id-type="medline">31867442</pub-id>
          <pub-id pub-id-type="pii">S2405-5794(18)30038-X</pub-id>
          <pub-id pub-id-type="pmcid">PMC6904830</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Story</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Aldridge</surname>
              <given-names>RW</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Garber</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ferenando</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Possas</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hemming</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wurie</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Luchenski</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Abubakar</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>McHugh</surname>
              <given-names>TD</given-names>
            </name>
            <name name-style="western">
              <surname>White</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Watson</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Lipman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Garfein</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hayward</surname>
              <given-names>AC</given-names>
            </name>
          </person-group>
          <article-title>Smartphone-enabled video-observed versus directly observed treatment for tuberculosis: a multicentre, analyst-blinded, randomised, controlled superiority trial</article-title>
          <source>Lancet</source>
          <year>2019</year>
          <month>03</month>
          <day>23</day>
          <volume>393</volume>
          <issue>10177</issue>
          <fpage>1216</fpage>
          <lpage>1224</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0140-6736(18)32993-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(18)32993-3</pub-id>
          <pub-id pub-id-type="medline">30799062</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(18)32993-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC6429626</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Story</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Garfein</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Hayward</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rusovich</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Dadu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Soltan</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Oprunenco</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Collins</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Sarin</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Quraishi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sharma</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Migliori</surname>
              <given-names>GB</given-names>
            </name>
            <name name-style="western">
              <surname>Varadarajan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Falzon</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Monitoring therapy compliance of tuberculosis patients by using video-enabled electronic devices</article-title>
          <source>Emerg Infect Dis</source>
          <year>2016</year>
          <month>03</month>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>538</fpage>
          <lpage>40</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.3201/eid2203.151620"/>
          </comment>
          <pub-id pub-id-type="doi">10.3201/eid2203.151620</pub-id>
          <pub-id pub-id-type="medline">26891363</pub-id>
          <pub-id pub-id-type="pmcid">PMC4766903</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garfein</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Cuevas-Mota</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Collins</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Muñoz</surname>
              <given-names>Fatima</given-names>
            </name>
            <name name-style="western">
              <surname>Catanzaro</surname>
              <given-names>DG</given-names>
            </name>
            <name name-style="western">
              <surname>Moser</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Higashi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Samarrai</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kriner</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Vaishampayan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cepeda</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bulterys</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>NK</given-names>
            </name>
            <name name-style="western">
              <surname>Rios</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Raab</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Tuberculosis treatment monitoring by video directly observed therapy in 5 health districts, California, USA</article-title>
          <source>Emerg Infect Dis</source>
          <year>2018</year>
          <month>10</month>
          <volume>24</volume>
          <issue>10</issue>
          <fpage>1806</fpage>
          <lpage>1815</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.3201/eid2410.180459"/>
          </comment>
          <pub-id pub-id-type="doi">10.3201/eid2410.180459</pub-id>
          <pub-id pub-id-type="medline">30226154</pub-id>
          <pub-id pub-id-type="pmcid">PMC6154139</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sinkou</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hurevich</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Rusovich</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Zhylevich</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Falzon</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>de Colombani</surname>
              <given-names>Pierpaolo</given-names>
            </name>
            <name name-style="western">
              <surname>Dadu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dara</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Story</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Skrahina</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Video-observed treatment for tuberculosis patients in Belarus: findings from the first programmatic experience</article-title>
          <source>Eur Respir J</source>
          <year>2017</year>
          <month>03</month>
          <day>22</day>
          <volume>49</volume>
          <issue>3</issue>
          <fpage>1602049</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://erj.ersjournals.com/lookup/pmidlookup?view=long&amp;pmid=28331042"/>
          </comment>
          <pub-id pub-id-type="doi">10.1183/13993003.02049-2016</pub-id>
          <pub-id pub-id-type="medline">28331042</pub-id>
          <pub-id pub-id-type="pii">49/3/1602049</pub-id>
          <pub-id pub-id-type="pmcid">PMC5380873</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sekandi</surname>
              <given-names>JN</given-names>
            </name>
            <name name-style="western">
              <surname>Buregyeya</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Zalwango</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dobbin</surname>
              <given-names>KK</given-names>
            </name>
            <name name-style="western">
              <surname>Atuyambe</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Nakkonde</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Turinawe</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tucker</surname>
              <given-names>EG</given-names>
            </name>
            <name name-style="western">
              <surname>Olowookere</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Turyahabwe</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Garfein</surname>
              <given-names>RS</given-names>
            </name>
          </person-group>
          <article-title>Video directly observed therapy for supporting and monitoring adherence to tuberculosis treatment in Uganda: a pilot cohort study</article-title>
          <source>ERJ Open Res</source>
          <year>2020</year>
          <month>01</month>
          <day>06</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>00175-2019</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32280670"/>
          </comment>
          <pub-id pub-id-type="doi">10.1183/23120541.00175-2019</pub-id>
          <pub-id pub-id-type="medline">32280670</pub-id>
          <pub-id pub-id-type="pii">00175-2019</pub-id>
          <pub-id pub-id-type="pmcid">PMC7132038</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garfein</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Cuevas-Mota</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Collins</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Catanzaro</surname>
              <given-names>DG</given-names>
            </name>
            <name name-style="western">
              <surname>Muñoz</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Moser</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Chuck</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Higashi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bulterys</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Raab</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Rios</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Evaluation of recorded video-observed therapy for anti-tuberculosis treatment</article-title>
          <source>Int J Tuberc Lung Dis</source>
          <year>2020</year>
          <month>05</month>
          <day>01</day>
          <volume>24</volume>
          <issue>5</issue>
          <fpage>520</fpage>
          <lpage>525</lpage>
          <pub-id pub-id-type="doi">10.5588/ijtld.19.0456</pub-id>
          <pub-id pub-id-type="medline">32398202</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>National Center for HIV/AIDS, Viral Hepatits, STD, and TB Prevention</collab>
            <collab>Division of Tuberculosis Elimination</collab>
          </person-group>
          <article-title>Impementing an electronic directly observed therapy (eDOT) program: a toolkit for tuberculosis programs</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <year>2015</year>
          <access-date>2023-02-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/tb/publications/pdf/tbedottoolkit.pdf">https://www.cdc.gov/tb/publications/pdf/tbedottoolkit.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Erickson</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Rockwern</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Koltov</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>McLean</surname>
              <given-names>RM</given-names>
            </name>
            <collab>Medical PracticeQuality Committee of the American College of Physicians</collab>
          </person-group>
          <article-title>Putting patients first by reducing administrative tasks in health care: a position paper of the American College of Physicians</article-title>
          <source>Ann Intern Med</source>
          <year>2017</year>
          <month>05</month>
          <day>02</day>
          <volume>166</volume>
          <issue>9</issue>
          <fpage>659</fpage>
          <lpage>661</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.acpjournals.org/doi/abs/10.7326/M16-2697?url_ver=Z39.88-2003&amp;rfr_id=ori:rid:crossref.org&amp;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.7326/M16-2697</pub-id>
          <pub-id pub-id-type="medline">28346948</pub-id>
          <pub-id pub-id-type="pii">2614079</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Doshi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Falzon</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>BV</given-names>
            </name>
            <name name-style="western">
              <surname>Temesgen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Sadasivan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Migliori</surname>
              <given-names>GB</given-names>
            </name>
            <name name-style="western">
              <surname>Raviglione</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Tuberculosis control, and the where and why of artificial intelligence</article-title>
          <source>ERJ Open Res</source>
          <year>2017</year>
          <month>04</month>
          <day>21</day>
          <volume>3</volume>
          <issue>2</issue>
          <fpage>00056-2017</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28656130"/>
          </comment>
          <pub-id pub-id-type="doi">10.1183/23120541.00056-2017</pub-id>
          <pub-id pub-id-type="medline">28656130</pub-id>
          <pub-id pub-id-type="pii">00056-2017</pub-id>
          <pub-id pub-id-type="pmcid">PMC5478795</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Falzon</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Timimi</surname>
              <given-names>Hazim</given-names>
            </name>
            <name name-style="western">
              <surname>Kurosinski</surname>
              <given-names>Pascal</given-names>
            </name>
            <name name-style="western">
              <surname>Migliori</surname>
              <given-names>Giovanni Battista</given-names>
            </name>
            <name name-style="western">
              <surname>Van Gemert</surname>
              <given-names>Wayne</given-names>
            </name>
            <name name-style="western">
              <surname>Denkinger</surname>
              <given-names>Claudia</given-names>
            </name>
            <name name-style="western">
              <surname>Isaacs</surname>
              <given-names>Chris</given-names>
            </name>
            <name name-style="western">
              <surname>Story</surname>
              <given-names>Alistair</given-names>
            </name>
            <name name-style="western">
              <surname>Garfein</surname>
              <given-names>Richard S</given-names>
            </name>
            <name name-style="western">
              <surname>do Valle Bastos</surname>
              <given-names>Luis Gustavo</given-names>
            </name>
            <name name-style="western">
              <surname>Yassin</surname>
              <given-names>Mohammed A</given-names>
            </name>
            <name name-style="western">
              <surname>Rusovich</surname>
              <given-names>Valiantsin</given-names>
            </name>
            <name name-style="western">
              <surname>Skrahina</surname>
              <given-names>Alena</given-names>
            </name>
            <name name-style="western">
              <surname>Van Hoi</surname>
              <given-names>Le</given-names>
            </name>
            <name name-style="western">
              <surname>Broger</surname>
              <given-names>Tobias</given-names>
            </name>
            <name name-style="western">
              <surname>Abubakar</surname>
              <given-names>Ibrahim</given-names>
            </name>
            <name name-style="western">
              <surname>Hayward</surname>
              <given-names>Andrew</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>Bruce V</given-names>
            </name>
            <name name-style="western">
              <surname>Temesgen</surname>
              <given-names>Zelalem</given-names>
            </name>
            <name name-style="western">
              <surname>Quraishi</surname>
              <given-names>Subhi</given-names>
            </name>
            <name name-style="western">
              <surname>von Delft</surname>
              <given-names>Dalene</given-names>
            </name>
            <name name-style="western">
              <surname>Jaramillo</surname>
              <given-names>Ernesto</given-names>
            </name>
            <name name-style="western">
              <surname>Weyer</surname>
              <given-names>Karin</given-names>
            </name>
            <name name-style="western">
              <surname>Raviglione</surname>
              <given-names>Mario C</given-names>
            </name>
          </person-group>
          <article-title>Digital health for the End TB Strategy: developing priority products and making them work</article-title>
          <source>Eur Respir J</source>
          <year>2016</year>
          <month>07</month>
          <volume>48</volume>
          <issue>1</issue>
          <fpage>29</fpage>
          <lpage>45</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://air.unimi.it/handle/2434/627461"/>
          </comment>
          <pub-id pub-id-type="doi">10.1183/13993003.00424-2016</pub-id>
          <pub-id pub-id-type="medline">27230443</pub-id>
          <pub-id pub-id-type="pii">13993003.00424-2016</pub-id>
          <pub-id pub-id-type="pmcid">PMC4929075</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Davenport</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kalakota</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>The potential for artificial intelligence in healthcare</article-title>
          <source>Future Healthc J</source>
          <year>2019</year>
          <month>06</month>
          <day>13</day>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>94</fpage>
          <lpage>98</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31363513"/>
          </comment>
          <pub-id pub-id-type="doi">10.7861/futurehosp.6-2-94</pub-id>
          <pub-id pub-id-type="medline">31363513</pub-id>
          <pub-id pub-id-type="pii">futurehealth</pub-id>
          <pub-id pub-id-type="pmcid">PMC6616181</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hazarika</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence: opportunities and implications for the health workforce</article-title>
          <source>Int Health</source>
          <year>2020</year>
          <month>07</month>
          <day>01</day>
          <volume>12</volume>
          <issue>4</issue>
          <fpage>241</fpage>
          <lpage>245</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32300794"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/inthealth/ihaa007</pub-id>
          <pub-id pub-id-type="medline">32300794</pub-id>
          <pub-id pub-id-type="pii">5821036</pub-id>
          <pub-id pub-id-type="pmcid">PMC7322190</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Spatharou</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hieronimus</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jenkins</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Transforming healthcare with AI: the impact on the workforce and organizations</article-title>
          <source>McKinsey &amp; Company</source>
          <year>2020</year>
          <month>3</month>
          <day>10</day>
          <access-date>2023-02-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mckinsey.com/industries/healthcare/our-insights/transforming-healthcare-with-ai">https://www.mckinsey.com/industries/healthcare/our-insights/transforming-healthcare-with-ai</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Esteva</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chou</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Yeung</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Naik</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Madani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mottaghi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Topol</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Socher</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Deep learning-enabled medical computer vision</article-title>
          <source>NPJ Digit Med</source>
          <year>2021</year>
          <month>01</month>
          <day>08</day>
          <volume>4</volume>
          <issue>1</issue>
          <fpage>5</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-020-00376-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-020-00376-2</pub-id>
          <pub-id pub-id-type="medline">33420381</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-020-00376-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC7794558</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>LeCun</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hinton</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Deep learning</article-title>
          <source>Nature</source>
          <year>2015</year>
          <month>05</month>
          <day>28</day>
          <volume>521</volume>
          <issue>7553</issue>
          <fpage>436</fpage>
          <lpage>44</lpage>
          <pub-id pub-id-type="doi">10.1038/nature14539</pub-id>
          <pub-id pub-id-type="medline">26017442</pub-id>
          <pub-id pub-id-type="pii">nature14539</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Faes</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wagner</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Korot</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ledsam</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Back</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chopra</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Pontikos</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kern</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Moraes</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Schmid</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Sim</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Balaskas</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bachmann</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Denniston</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Keane</surname>
              <given-names>PA</given-names>
            </name>
          </person-group>
          <article-title>Automated deep learning design for medical image classification by health-care professionals with no coding experience: a feasibility study</article-title>
          <source>Lancet Digit Health</source>
          <year>2019</year>
          <month>09</month>
          <volume>1</volume>
          <issue>5</issue>
          <fpage>e232</fpage>
          <lpage>e242</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2589-7500(19)30108-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2589-7500(19)30108-6</pub-id>
          <pub-id pub-id-type="medline">33323271</pub-id>
          <pub-id pub-id-type="pii">S2589-7500(19)30108-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mnih</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Kavukcuoglu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Silver</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rusu</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Veness</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bellemare</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Graves</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Riedmiller</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Fidjeland</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Ostrovski</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Petersen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Beattie</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sadik</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Antonoglou</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>King</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kumaran</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wierstra</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Legg</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hassabis</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Human-level control through deep reinforcement learning</article-title>
          <source>Nature</source>
          <year>2015</year>
          <month>02</month>
          <day>26</day>
          <volume>518</volume>
          <issue>7540</issue>
          <fpage>529</fpage>
          <lpage>33</lpage>
          <pub-id pub-id-type="doi">10.1038/nature14236</pub-id>
          <pub-id pub-id-type="medline">25719670</pub-id>
          <pub-id pub-id-type="pii">nature14236</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Suthaharan</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Support vector machine</article-title>
          <source>Machine Learning Models and Algorithms for Big Data Classification</source>
          <year>2016</year>
          <publisher-loc>Boston, MA</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>207</fpage>
          <lpage>235</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Szegedy</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ioffe</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Vanhoucke</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Alemi</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Inception-v4, Inception-ResNet and the impact of residual connections on learning</article-title>
          <year>2017</year>
          <month>02</month>
          <day>12</day>
          <conf-name>Thirty-First AAAI Conference on Artificial Intelligence</conf-name>
          <conf-date>February 4-9, 2017</conf-date>
          <conf-loc>San Francisco, CA</conf-loc>
          <pub-id pub-id-type="doi">10.1609/aaai.v31i1.11231</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hara</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kataoka</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Satoh</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Learning spatio-temporal features with 3D residual networks for action recognition</article-title>
          <year>2018</year>
          <month>1</month>
          <day>22</day>
          <conf-name>2017 IEEE International Conference on Computer Vision Workshops (ICCVW)</conf-name>
          <conf-date>October 22-29, 2017</conf-date>
          <conf-loc>Venice, Italy</conf-loc>
          <pub-id pub-id-type="doi">10.1109/iccvw.2017.373</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hara</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kataoka</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Satoh</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Can spatiotemporal 3D CNNs retrace the history of 2D CNNs and ImageNet?</article-title>
          <year>2018</year>
          <month>12</month>
          <day>16</day>
          <conf-name>018 IEEE/CVF Conference on Computer Vision and Pattern Recognition</conf-name>
          <conf-date>June 18-23, 2018</conf-date>
          <conf-loc>Salt Lake City, UT</conf-loc>
          <pub-id pub-id-type="doi">10.1109/cvpr.2018.00685</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Carreira</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zisserman</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Quo vadis, action recognition? a new model and the kinetics dataset</article-title>
          <year>2017</year>
          <month>11</month>
          <day>9</day>
          <conf-name>2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>
          <conf-date>July 21-26, 2017</conf-date>
          <conf-loc>Honolulu, HI</conf-loc>
          <pub-id pub-id-type="doi">10.1109/cvpr.2017.502</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bellemo</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>ZW</given-names>
            </name>
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>QD</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yip</surname>
              <given-names>MYT</given-names>
            </name>
            <name name-style="western">
              <surname>Hamzah</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ho</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>XQ</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Musonda</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chandran</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chipalo-Mutati</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Muma</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>GSW</given-names>
            </name>
            <name name-style="western">
              <surname>Sivaprasad</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Menon</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>TY</given-names>
            </name>
            <name name-style="western">
              <surname>Ting</surname>
              <given-names>DSW</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence using deep learning to screen for referable and vision-threatening diabetic retinopathy in Africa: a clinical validation study</article-title>
          <source>Lancet Digit Health</source>
          <year>2019</year>
          <month>05</month>
          <volume>1</volume>
          <issue>1</issue>
          <fpage>e35</fpage>
          <lpage>e44</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2589-7500(19)30004-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2589-7500(19)30004-4</pub-id>
          <pub-id pub-id-type="medline">33323239</pub-id>
          <pub-id pub-id-type="pii">S2589-7500(19)30004-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jia</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Socher</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>FF</given-names>
            </name>
          </person-group>
          <article-title>ImageNet: a large-scale hierarchical image database</article-title>
          <year>2009</year>
          <month>8</month>
          <day>18</day>
          <conf-name>2009 IEEE Conference on Computer Vision and Pattern Recognition</conf-name>
          <conf-date>June 20-25, 2009</conf-date>
          <conf-loc>Miami, FL</conf-loc>
          <pub-id pub-id-type="doi">10.1109/cvpr.2009.5206848</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kay</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Carreira</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Simonyan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hillier</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Vijayanarasimhan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Viola</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>The Kinetics Human Action Video Dataset</article-title>
          <source>arXiv.</source>
          <comment>Preprint posted online on May 19, 2017</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1705.06950</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>He</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Deep residual learning for image recognition</article-title>
          <year>2016</year>
          <month>12</month>
          <day>12</day>
          <conf-name>2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>
          <conf-date>June 27-30, 2016</conf-date>
          <conf-loc>Las Vegas, NV</conf-loc>
          <pub-id pub-id-type="doi">10.1109/cvpr.2016.90</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dalal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Triggs</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Histograms of oriented gradients for human detection</article-title>
          <year>2005</year>
          <month>7</month>
          <day>25</day>
          <conf-name>2005 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR'05)</conf-name>
          <conf-date>June 20-25, 2005</conf-date>
          <conf-loc>San Diego, CA</conf-loc>
          <pub-id pub-id-type="doi">10.1109/cvpr.2005.177</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hussain</surname>
              <given-names>Mujtaba</given-names>
            </name>
          </person-group>
          <article-title>What is cross validation in machine learning? types of cross validation</article-title>
          <source>Great Learning</source>
          <year>2020</year>
          <access-date>2023-02-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mygreatlearning.com/blog/cross-validation/">https://www.mygreatlearning.com/blog/cross-validation/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Khoshgoftaar</surname>
              <given-names>TM</given-names>
            </name>
          </person-group>
          <article-title>Robust thresholding strategies for highly imbalanced and noisy data</article-title>
          <year>2022</year>
          <month>1</month>
          <day>25</day>
          <conf-name>2021 20th IEEE International Conference on Machine Learning and Applications (ICMLA)</conf-name>
          <conf-date>December 13-16, 2021</conf-date>
          <conf-loc>Pasadena, CA</conf-loc>
          <pub-id pub-id-type="doi">10.1109/icmla52953.2021.00192</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Richens</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Johri</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Improving the accuracy of medical diagnosis with causal machine learning</article-title>
          <source>Nat Commun</source>
          <year>2020</year>
          <month>08</month>
          <day>11</day>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>3923</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41467-020-17419-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41467-020-17419-7</pub-id>
          <pub-id pub-id-type="medline">32782264</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41467-020-17419-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC7419549</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Diba</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fayyaz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sharma</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Arzani</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Yousefzadeh</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gall</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>van Gool</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Spatio-temporal channel correlation networks for action classification</article-title>
          <year>2018</year>
          <month>10</month>
          <day>6</day>
          <conf-name>ECCV 2018: Computer Vision – ECCV 2018</conf-name>
          <conf-date>September 8-14, 2018</conf-date>
          <conf-loc>Munich, Germany</conf-loc>
          <fpage>299</fpage>
          <lpage>315</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-030-01225-0_18</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Faes</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kale</surname>
              <given-names>AU</given-names>
            </name>
            <name name-style="western">
              <surname>Wagner</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bruynseels</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mahendiran</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Moraes</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Shamdas</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kern</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ledsam</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Schmid</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Balaskas</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Topol</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bachmann</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Keane</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Denniston</surname>
              <given-names>AK</given-names>
            </name>
          </person-group>
          <article-title>A comparison of deep learning performance against health-care professionals in detecting diseases from medical imaging: a systematic review and meta-analysis</article-title>
          <source>Lancet Digit Health</source>
          <year>2019</year>
          <month>10</month>
          <volume>1</volume>
          <issue>6</issue>
          <fpage>e271</fpage>
          <lpage>e297</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2589-7500(19)30123-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2589-7500(19)30123-2</pub-id>
          <pub-id pub-id-type="medline">33323251</pub-id>
          <pub-id pub-id-type="pii">S2589-7500(19)30123-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kichloo</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Albosta</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dettloff</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wani</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>El-Amir</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Aljadah</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chakinala</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Kanugula</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Solanki</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chugh</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Telemedicine, the current COVID-19 pandemic and the future: a narrative review and perspectives moving forward in the USA</article-title>
          <source>Fam Med Community Health</source>
          <year>2020</year>
          <month>08</month>
          <day>18</day>
          <volume>8</volume>
          <issue>3</issue>
          <fpage>e000530</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://fmch.bmj.com/lookup/pmidlookup?view=long&amp;pmid=32816942"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/fmch-2020-000530</pub-id>
          <pub-id pub-id-type="medline">32816942</pub-id>
          <pub-id pub-id-type="pii">fmch-2020-000530</pub-id>
          <pub-id pub-id-type="pmcid">PMC7437610</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Doraiswamy</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Abraham</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mamtani</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Cheema</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Use of telehealth during the COVID-19 pandemic: scoping review</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>12</month>
          <day>01</day>
          <volume>22</volume>
          <issue>12</issue>
          <fpage>e24087</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/12/e24087/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/24087</pub-id>
          <pub-id pub-id-type="medline">33147166</pub-id>
          <pub-id pub-id-type="pii">v22i12e24087</pub-id>
          <pub-id pub-id-type="pmcid">PMC7710390</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gulshan</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Coram</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Stumpe</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Narayanaswamy</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Venugopalan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Widner</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Madams</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Cuadros</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Raman</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>PC</given-names>
            </name>
            <name name-style="western">
              <surname>Mega</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Webster</surname>
              <given-names>DR</given-names>
            </name>
          </person-group>
          <article-title>Development and validation of a deep learning algorithm for detection of diabetic retinopathy in retinal fundus photographs</article-title>
          <source>JAMA</source>
          <year>2016</year>
          <month>12</month>
          <day>13</day>
          <volume>316</volume>
          <issue>22</issue>
          <fpage>2402</fpage>
          <lpage>2410</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2016.17216</pub-id>
          <pub-id pub-id-type="medline">27898976</pub-id>
          <pub-id pub-id-type="pii">2588763</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
