<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<?covid-19-tdm?>
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Med.</journal-id>
<journal-title>Frontiers in Medicine</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Med.</abbrev-journal-title>
<issn pub-type="epub">2296-858X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmed.2023.1089087</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Medicine</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Validation of automated data abstraction for SCCM discovery VIRUS COVID-19 registry: practical EHR export pathways (VIRUS-PEEP)</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author"><name><surname>Valencia Morales</surname><given-names>Diana J.</given-names></name><xref rid="aff1" ref-type="aff"><sup>1</sup></xref>
<xref rid="fn0001" ref-type="author-notes"><sup>&#x2020;</sup></xref>
</contrib>
<contrib contrib-type="author"><name><surname>Bansal</surname><given-names>Vikas</given-names></name><xref rid="aff2" ref-type="aff"><sup>2</sup></xref>
<xref rid="fn0001" ref-type="author-notes"><sup>&#x2020;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1110873/overview"/>
</contrib>
<contrib contrib-type="author"><name><surname>Heavner</surname><given-names>Smith F.</given-names></name><xref rid="aff3" ref-type="aff"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author"><name><surname>Castro</surname><given-names>Janna C.</given-names></name><xref rid="aff4" ref-type="aff"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author"><name><surname>Sharma</surname><given-names>Mayank</given-names></name><xref rid="aff1" ref-type="aff"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author"><name><surname>Tekin</surname><given-names>Aysun</given-names></name><xref rid="aff1" ref-type="aff"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1103422/overview"/>
</contrib>
<contrib contrib-type="author"><name><surname>Bogojevic</surname><given-names>Marija</given-names></name><xref rid="aff1" ref-type="aff"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author"><name><surname>Zec</surname><given-names>Simon</given-names></name><xref rid="aff1" ref-type="aff"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author"><name><surname>Sharma</surname><given-names>Nikhil</given-names></name><xref rid="aff2" ref-type="aff"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1661128/overview"/>
</contrib>
<contrib contrib-type="author"><name><surname>Cartin-Ceba</surname><given-names>Rodrigo</given-names></name><xref rid="aff5" ref-type="aff"><sup>5</sup></xref>
</contrib>
<contrib contrib-type="author"><name><surname>Nanchal</surname><given-names>Rahul S.</given-names></name><xref rid="aff6" ref-type="aff"><sup>6</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2284131/overview"/>
</contrib>
<contrib contrib-type="author"><name><surname>Sanghavi</surname><given-names>Devang K.</given-names></name><xref rid="aff7" ref-type="aff"><sup>7</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1621637/overview"/>
</contrib>
<contrib contrib-type="author"><name><surname>La Nou</surname><given-names>Abigail T.</given-names></name><xref rid="aff8" ref-type="aff"><sup>8</sup></xref>
</contrib>
<contrib contrib-type="author"><name><surname>Khan</surname><given-names>Syed A.</given-names></name><xref rid="aff9" ref-type="aff"><sup>9</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2095273/overview"/>
</contrib>
<contrib contrib-type="author"><name><surname>Belden</surname><given-names>Katherine A.</given-names></name><xref rid="aff10" ref-type="aff"><sup>10</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1821239/overview"/>
</contrib>
<contrib contrib-type="author"><name><surname>Chen</surname><given-names>Jen-Ting</given-names></name><xref rid="aff11" ref-type="aff"><sup>11</sup></xref>
</contrib>
<contrib contrib-type="author"><name><surname>Melamed</surname><given-names>Roman R.</given-names></name><xref rid="aff12" ref-type="aff"><sup>12</sup></xref>
</contrib>
<contrib contrib-type="author"><name><surname>Sayed</surname><given-names>Imran A.</given-names></name><xref rid="aff13" ref-type="aff"><sup>13</sup></xref>
</contrib>
<contrib contrib-type="author"><name><surname>Reilkoff</surname><given-names>Ronald A.</given-names></name><xref rid="aff14" ref-type="aff"><sup>14</sup></xref>
</contrib>
<contrib contrib-type="author"><name><surname>Herasevich</surname><given-names>Vitaly</given-names></name><xref rid="aff1" ref-type="aff"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1208981/overview"/>
</contrib>
<contrib contrib-type="author"><name><surname>Domecq Garces</surname><given-names>Juan Pablo</given-names></name><xref rid="aff2" ref-type="aff"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1249548/overview"/>
</contrib>
<contrib contrib-type="author"><name><surname>Walkey</surname><given-names>Allan J.</given-names></name><xref rid="aff15" ref-type="aff"><sup>15</sup></xref>
</contrib>
<contrib contrib-type="author"><name><surname>Boman</surname><given-names>Karen</given-names></name><xref rid="aff16" ref-type="aff"><sup>16</sup></xref>
</contrib>
<contrib contrib-type="author"><name><surname>Kumar</surname><given-names>Vishakha K.</given-names></name><xref rid="aff16" ref-type="aff"><sup>16</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2098647/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes"><name><surname>Kashyap</surname><given-names>Rahul</given-names></name><xref rid="aff1" ref-type="aff"><sup>1</sup></xref>
<xref rid="c001" ref-type="corresp"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/668768/overview"/>
</contrib>
<on-behalf-of>on behalf of Society of Critical Care Medicine&#x2019;s Discovery, the Critical Care Research Network</on-behalf-of>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Division of Critical Care Medicine, Department of Anesthesiology and Perioperative Care, Mayo Clinic</institution>, <addr-line>Rochester, MN</addr-line>, <country>United States</country></aff>
<aff id="aff2"><sup>2</sup><institution>Division of Nephrology and Critical Care Medicine, Department of Internal Medicine, Mayo Clinic</institution>, <addr-line>Rochester, MN</addr-line>, <country>United States</country></aff>
<aff id="aff3"><sup>3</sup><institution>CURE Drug Repurposing Collaboratory, Critical Path Institute</institution>, <addr-line>Tucson, AZ</addr-line>, <country>United States</country></aff>
<aff id="aff4"><sup>4</sup><institution>Department of Information Technology, Mayo Clinic</institution>, <addr-line>Scottsdale, AZ</addr-line>, <country>United States</country></aff>
<aff id="aff5"><sup>5</sup><institution>Division of Critical Care Medicine, Department of Pulmonary Medicine, Mayo Clinic</institution>, <addr-line>Scottsdale, AZ</addr-line>, <country>United States</country></aff>
<aff id="aff6"><sup>6</sup><institution>Division of Pulmonary and Critical Care Medicine, Department of Internal Medicine, Medical College of Wisconsin</institution>, <addr-line>Milwaukee, WI</addr-line>, <country>United States</country></aff>
<aff id="aff7"><sup>7</sup><institution>Department of Critical Care Medicine, Mayo Clinic Florida</institution>, <addr-line>Jacksonville, FL</addr-line>, <country>United States</country></aff>
<aff id="aff8"><sup>8</sup><institution>Department of Critical Care Medicine, Mayo Clinic Health System</institution>, <addr-line>Eau Claire, WI</addr-line>, <country>United States</country></aff>
<aff id="aff9"><sup>9</sup><institution>Department of Critical Care Medicine, Mayo Clinic Health System</institution>, <addr-line>Mankato, MN</addr-line>, <country>United States</country></aff>
<aff id="aff10"><sup>10</sup><institution>Division of Infectious Diseases, Sidney Kimmel Medical College at Thomas Jefferson University</institution>, <addr-line>Philadelphia, PA</addr-line>, <country>United States</country></aff>
<aff id="aff11"><sup>11</sup><institution>Division of Critical Care Medicine, Department of Internal Medicine, Montefiore Medical Center, Albert Einstein College of Medicine</institution>, <addr-line>Bronx, NY</addr-line>, <country>United States</country></aff>
<aff id="aff12"><sup>12</sup><institution>Department of Critical Care Medicine, Abbott Northwestern Hospital, Allina Health</institution>, <addr-line>Minneapolis, MN</addr-line>, <country>United States</country></aff>
<aff id="aff13"><sup>13</sup><institution>Department of Pediatrics, Children&#x2019;s Hospital of Colorado, University of Colorado Anschutz Medical Campus</institution>, <addr-line>Colorado Springs, CO</addr-line>, <country>United States</country></aff>
<aff id="aff14"><sup>14</sup><institution>Division of Pulmonary, Allergy, Critical Care and Sleep Medicine, Department of Internal Medicine, University of Minnesota Medical School</institution>, <addr-line>Edina, MN</addr-line>, <country>United States</country></aff>
<aff id="aff15"><sup>15</sup><institution>Division of Pulmonary, Allergy, Critical Care and Sleep Medicine, Department of Medicine, Evans Center of Implementation and Improvement Sciences, Boston University School of Medicine</institution>, <addr-line>Boston, MA</addr-line>, <country>United States</country></aff>
<aff id="aff16"><sup>16</sup><institution>Society of Critical Care Medicine</institution>, <addr-line>Mount Prospect, IL</addr-line>, <country>United States</country></aff>
<author-notes>
<fn fn-type="edited-by" id="fn0002">
<p>Edited by: Gulzar H. Shah, Georgia Southern University, United States</p>
</fn>
<fn fn-type="edited-by" id="fn0003">
<p>Reviewed by: Kristie Cason Waterfield, Georgia Southern University, United States; Hong Qin, University of Tennessee at Chattanooga, United States</p>
</fn>
<corresp id="c001">&#x002A;Correspondence: Rahul Kashyap, <email>kashyapmd@gmail.com</email></corresp>
<fn fn-type="equal" id="fn0001">
<p><sup>&#x2020;</sup>These authors have contributed equally to this work and share first authorship</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>04</day>
<month>10</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>10</volume>
<elocation-id>1089087</elocation-id>
<history>
<date date-type="received">
<day>20</day>
<month>04</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>14</day>
<month>09</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2023 Valencia Morales, Bansal, Heavner, Castro, Sharma, Tekin, Bogojevic, Zec, Sharma, Cartin-Ceba, Nanchal, Sanghavi, La Nou, Khan, Belden, Chen, Melamed, Sayed, Reilkoff, Herasevich, Domecq Garces, Walkey, Boman, Kumar and Kashyap.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Valencia Morales, Bansal, Heavner, Castro, Sharma, Tekin, Bogojevic, Zec, Sharma, Cartin-Ceba, Nanchal, Sanghavi, La Nou, Khan, Belden, Chen, Melamed, Sayed, Reilkoff, Herasevich, Domecq Garces, Walkey, Boman, Kumar and Kashyap.</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec id="sec1">
<title>Background</title>
<p>The gold standard for gathering data from electronic health records (EHR) has been manual data extraction; however, this requires vast resources and personnel. Automation of this process reduces resource burdens and expands research opportunities.</p>
</sec>
<sec id="sec2">
<title>Objective</title>
<p>This study aimed to determine the feasibility and reliability of automated data extraction in a large registry of adult COVID-19 patients.</p>
</sec>
<sec id="sec3">
<title>Materials and methods</title>
<p>This observational study included data from sites participating in the SCCM Discovery VIRUS COVID-19 registry. Important demographic, comorbidity, and outcome variables were chosen for manual and automated extraction for the feasibility dataset. We quantified the degree of agreement with Cohen&#x2019;s kappa statistics for categorical variables. The sensitivity and specificity were also assessed. Correlations for continuous variables were assessed with Pearson&#x2019;s correlation coefficient and Bland&#x2013;Altman plots. The strength of agreement was defined as almost perfect (0.81&#x2013;1.00), substantial (0.61&#x2013;0.80), and moderate (0.41&#x2013;0.60) based on kappa statistics. Pearson correlations were classified as trivial (0.00&#x2013;0.30), low (0.30&#x2013;0.50), moderate (0.50&#x2013;0.70), high (0.70&#x2013;0.90), and extremely high (0.90&#x2013;1.00).</p>
</sec>
<sec id="sec4">
<title>Measurements and main results</title>
<p>The cohort included 652 patients from 11 sites. The agreement between manual and automated extraction for categorical variables was almost perfect in 13 (72.2%) variables (Race, Ethnicity, Sex, Coronary Artery Disease, Hypertension, Congestive Heart Failure, Asthma, Diabetes Mellitus, ICU admission rate, IMV rate, HFNC rate, ICU and Hospital Discharge Status), and substantial in five (27.8%) (COPD, CKD, Dyslipidemia/Hyperlipidemia, NIMV, and ECMO rate). The correlations were extremely high in three (42.9%) variables (age, weight, and hospital LOS) and high in four (57.1%) of the continuous variables (Height, Days to ICU admission, ICU LOS, and IMV days). The average sensitivity and specificity for the categorical data were 90.7 and 96.9%.</p>
</sec>
<sec id="sec5">
<title>Conclusion and relevance</title>
<p>Our study confirms the feasibility and validity of an automated process to gather data from the EHR.</p>
</sec>
</abstract>
<kwd-group>
<kwd>validation</kwd>
<kwd>data automation</kwd>
<kwd>electronic health records</kwd>
<kwd>COVID-19</kwd>
<kwd>VIRUS COVID-19 registry</kwd>
</kwd-group>
<counts>
<fig-count count="2"/>
<table-count count="3"/>
<equation-count count="1"/>
<ref-count count="36"/>
<page-count count="10"/>
<word-count count="5846"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Intensive Care Medicine and Anesthesiology</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec6">
<title>Introduction</title>
<p>The pandemic of the coronavirus disease 2019 (COVID-19) has created a need to develop research resources rapidly (<xref ref-type="bibr" rid="ref1">1</xref>). In response to the global demand for robust multicenter clinical data regarding patient care and outcomes, the Society of Critical Care Medicine (SCCM) Discovery Viral Infection and Respiratory Illness Universal Study (VIRUS) COVID-19 registry was created early in the pandemic (<xref ref-type="bibr" rid="ref2 ref3 ref4">2&#x2013;4</xref>).</p>
<p>Due to the surging nature of pandemic waves, and the subsequent workload and staffing burdens, clinical researchers have encountered difficulty in engaging in rapid, reliable manual data extraction from the electronic health record (EHR) (<xref ref-type="bibr" rid="ref5">5</xref>). Manual chart review is the gold standard method for gathering data for retrospective research studies (<xref ref-type="bibr" rid="ref6">6</xref>, <xref ref-type="bibr" rid="ref7">7</xref>). This process, however, is time consuming and necessitates personnel resources not widely available at all institutions (<xref ref-type="bibr" rid="ref8">8</xref>, <xref ref-type="bibr" rid="ref9">9</xref>). Prior to the pandemic, automated data extraction from the EHR utilizing direct database queries was shown to be faster and less error-pone than manual data extraction (<xref ref-type="bibr" rid="ref8">8</xref>, <xref ref-type="bibr" rid="ref10">10</xref>). Nonetheless, data quality challenges related to high complexity or fragmentation of data across many EHR systems make automated extraction vulnerable (<xref ref-type="bibr" rid="ref11 ref12 ref13 ref14">11&#x2013;14</xref>). Both manual and automatic extraction rely on the EHR, which is an artifact with its own biases, mistakes, and subjectivity (<xref ref-type="bibr" rid="ref15 ref16 ref17 ref18 ref19 ref20">15&#x2013;20</xref>).</p>
<p>Although previous research has looked at these notions, the best methods for obtaining data from EHR systems for research still need to be discovered. In response, we sought to assess the feasibility, reliability, and validity of an automated data extraction process using data for the VIRUS COVID-19 registry.</p>
</sec>
<sec sec-type="methods" id="sec7">
<title>Methods</title>
<sec id="sec8">
<title>VIRUS COVID-19 registry</title>
<p>The SCCM Discovery VIRUS COVID-19 registry (Clinical Trials registration number: NCT04323787) is a multicenter, international database with over 80,000 patients from 306 health sites across 28 countries (<xref ref-type="bibr" rid="ref21">21</xref>). VIRUS COVID-19 registry is an ongoing prospective observational study that aims at real-time data gathering and analytics with a feedback loop to disseminate treatment and outcome knowledge to improve COVID-19 patient care (<xref ref-type="bibr" rid="ref3">3</xref>). The Mayo Clinic Institutional Review Board authorized the SCCM Discovery VIRUS COVID-19 registry as exempt on March 23, 2020 (IRB number: 20&#x2013;002610). No informed consent was deemed necessary for the study subjects. The procedures were followed in accordance with the Helsinki Declaration of 2013 (<xref ref-type="bibr" rid="ref22">22</xref>). Among the participating sites, 30 individual centers are collaborating to rapidly develop tools and resources to optimize EHR data collection. These efforts are led by the VIRUS Practical EHR Export Pathways group (VIRUS-PEEP).</p>
</sec>
<sec id="sec9">
<title>Data collection</title>
<p>The VIRUS COVID-19 registry has over 500 variables which represents the pandemic registry common data standards for critically ill patients adapted from the World Health Organization- International Severe Acute Respiratory and Emerging Infection Consortium (WHO-ISARIC) COVID-19 CRF v1.3 24 February 2020 (<xref ref-type="bibr" rid="ref23">23</xref>). The VIRUS-PEEP validation cohort was developed in an iterative, consensus process by a group of VIRUS: COVID-19 registry primary investigators to explore the feasibility of an automation process at each site. The Validation cohort variable was internally validated with seven core VIRUS COVID-19 investigators and subsequently validated from VIRUS-PEEP leads site&#x2019;s principal investigators. Because of the timeline, the cohort could not be externally validated. A purposeful representative sample of the 25 most clinically relevant variables from each category (Baseline demographic and clinical characteristics of patient and ICU and Hospital-related outcomes) were selected and prioritized for this study (<xref ref-type="bibr" rid="ref4">4</xref>). We focused on demographic data (age, sex, race, ethnicity, height, weight), comorbidities (coronary artery disease (CAD), hypertension (HTN), congestive heart failure (CHF), chronic obstructive pulmonary disease (COPD), asthma, chronic kidney disease (CKD), diabetes mellitus (DM), dyslipidemia/hyperlipidemia), and clinical outcomes (intensive care unit (ICU) admission, days to ICU admission, ICU length of stay (LOS), type to oxygenation requirement, extracorporeal membrane oxygenation (ECMO), ICU discharge status, hospital LOS, and in-hospital mortality).</p>
<p>To avoid data extraction errors, we utilized precise variable definitions [VIRUS COVID-19 registry code book, cases report form (CRF), and Standard Operating Procedure (SOP)], which were already implemented in the registry and during the pilot phase of the automation implementation. Additionally, all manual and automation data extraction personnel were educated regarding the definitions and procedures needed to collect and report the data.</p>
</sec>
<sec id="sec10">
<title>System description</title>
<p>De-identified data were collected through Research Electronic Data Capture software (REDCap, version 8.11.11, Vanderbilt University, Nashville, Tennessee) at Mayo Clinic, Rochester, MN, United States (<xref ref-type="bibr" rid="ref24">24</xref>). The REDCap electronic data capture system is a secure, web-based application for research data capture that includes an intuitive interface for validated data entry; audit trails for tracking data manipulation and export procedures; automated export procedures for seamless data downloads to standard statistical packages; and provide a secure platform for importing data from external sources.</p>
</sec>
<sec id="sec11">
<title>Manual abstraction</title>
<p>The VIRUS PEEP group has implemented a comprehensive process for data extraction, which involves training manual data extractors. These data extractors are trained to identify, abstract, and collect patient data according to the project&#x2019;s SOP. During a patient&#x2019;s hospitalization, extractors follow them until discharge, ensuring that all relevant information is collected. The CRF used in this process includes two main sections: demographics and outcomes, composed of categorical and continuous variables. Extractors answer a mix of binary (&#x201C;yes&#x201D; or &#x201C;no&#x201D;) and checkbox (&#x201C;check all that apply&#x201D;) questions in the nominal variable portions of the CRF. They are instructed to avoid free text and use the prespecified units for continuous variables. In any disagreement, a trainer is always available for guidance and correction. It&#x2019;s important to note that the manual extractors are unaware of the automated data extraction results.</p>
</sec>
<sec id="sec12">
<title>Automated extraction</title>
<p>A package of sequential query language (SQL) scripts for the &#x201C;Epic Clarity&#x201D; database was developed at one institution and shared through the SCCM&#x2019;s Secure File Transfer Platform (SFTP) with participating sites. A second site offered peer coaching on the development and utility of end-user Epic&#x2122; reporting functions and how to adapt and modify the SQL scripts according to their EHR environment and security firewall. Other tools included R-Studio&#x2122; scripts, Microsoft Excel&#x2122; macros, STATA 16, and REDCap calculators for data quality checks at participating sites before data upload to VIRUS Registry REDCap. These tools were designed to aid in data extraction, data cleaning, and adherence to data quality rules as provided in VIRUS COVID-19 Registry SOPs. Institutions participated in weekly conference calls to discuss challenges and share successes in implementing automated data abstraction; additionally, lessons learned from adapting the SQL scripts and other data quality tools to their EHR environments were shared between individual sites and members of the VIRUS PEEP group.</p>
</sec>
<sec id="sec13">
<title>Statistical analysis</title>
<p>We summarized continuous variables of manual and automation process data using mean&#x2009;&#x00B1;&#x2009;SD and calculated mean difference and SE by matched pair analysis. Pearson correlation coefficient (PCCs) and 95% confidence intervals (CI) were generated for continuous data as a measure of inter-class dependability (<xref ref-type="bibr" rid="ref25">25</xref>). Pearson correlations were classified as trivial (0.00&#x2013;0.30), low (0.30&#x2013;0.50), moderate (0.50&#x2013;0.70), high (0.70&#x2013;0.90), and extremely high (0.90&#x2013;1.00) (<xref ref-type="bibr" rid="ref26">26</xref>). Bland&#x2013;Altman mean-difference plots for continuous variables were also provided to aid in the understanding of agreement (<xref ref-type="bibr" rid="ref27">27</xref>).</p>
<p>Percent agreements were determined for the data collected using each of the two extraction techniques in a categorical variable:<disp-formula id="E1">
<mml:math id="M1">
<mml:mfrac>
<mml:mrow>
<mml:mi mathvariant="italic">Number</mml:mi>
<mml:mspace width="0.25em"/>
<mml:mi mathvariant="italic">of</mml:mi>
<mml:mspace width="0.25em"/>
<mml:mi mathvariant="italic">patients</mml:mi>
<mml:mspace width="0.25em"/>
<mml:mi mathvariant="italic">categorized</mml:mi>
<mml:mspace width="0.25em"/>
<mml:mi mathvariant="italic">identically</mml:mi>
<mml:mspace width="0.25em"/>
<mml:mi>b</mml:mi>
<mml:mi>y</mml:mi>
<mml:mspace width="0.25em"/>
<mml:mi mathvariant="italic">both</mml:mi>
<mml:mspace width="0.25em"/>
<mml:mi mathvariant="italic">sources</mml:mi>
<mml:mspace width="0.25em"/>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">Total</mml:mi>
<mml:mspace width="0.25em"/>
<mml:mi mathvariant="italic">number</mml:mi>
<mml:mspace width="0.25em"/>
<mml:mi mathvariant="italic">of</mml:mi>
<mml:mspace width="0.25em"/>
<mml:mi mathvariant="italic">cases</mml:mi>
<mml:mspace width="0.25em"/>
<mml:mi mathvariant="italic">examined</mml:mi>
<mml:mspace width="0.25em"/>
<mml:mi>b</mml:mi>
<mml:mi>y</mml:mi>
<mml:mspace width="0.25em"/>
<mml:mi mathvariant="italic">both</mml:mi>
<mml:mspace width="0.25em"/>
<mml:mi mathvariant="italic">sources</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:math>
</disp-formula></p>
<p>The total number of agreeing outcomes divided by the total number of results is the summary agreement for each variable. For categorical variables we used Cohen&#x2019;s kappa coefficient (<xref ref-type="bibr" rid="ref28">28</xref>). We used the scale created by Landis et al. to establish the degree of agreement (<xref ref-type="bibr" rid="ref29">29</xref>). This scale is divided by almost perfect (&#x03F0; =0.81&#x2013;1.00), substantial (&#x03F0;&#x2009;=&#x2009;0.61&#x2013;0.80), moderate (&#x03F0;&#x2009;=&#x2009;0.41&#x2013;0.60), fair (&#x03F0;&#x2009;=&#x2009;0.21&#x2013;0.40), slight (&#x03F0;&#x2009;=&#x2009;0.00&#x2013;0.20), and poor (&#x03F0;&#x2009;&#x003C;&#x2009;0.00). Additionally, the sensitivity and specificity were calculated by comparing the results of the automated data extractions method to the results of manual data extraction method (gold standard). The 95% confidence intervals were calculated using an exact test for proportions. We used JMP statistical software version 16.2 for all data analysis.</p>
</sec>
</sec>
<sec sec-type="results" id="sec14">
<title>Results</title>
<p>Our cohort consisted of data from 652 patients from 11 sites (<xref rid="fig1" ref-type="fig">Figure 1</xref>). A total of 25 variables were collected for each patient for manual and automated methods. Of these 25 variables, 16 (64.0%) were nominal, 7 (28.0%) were continuous, and 2 (8.0%) were categorical variables.</p>
<fig position="float" id="fig1"><label>Figure 1</label>
<caption>
<p>Study flowchart.</p>
</caption>
<graphic xlink:href="fmed-10-1089087-g001.tif"/>
</fig>
<p><xref rid="tab1" ref-type="table">Table 1</xref> summarizes the continuous variables. The automated results for three variables (age, weight and hospital LOS) agreed &#x201C;extremely high&#x201D; (&#x003E;90%) to the manual extraction results. The agreement was &#x201C;high&#x201D; (70&#x2013;90%) for height, days to ICU admission, ICU LOS, and IMV days. <xref rid="fig2" ref-type="fig">Figure 2</xref> presents the Bland&#x2013;Altman plots for seven continuous variables.</p>
<table-wrap position="float" id="tab1"><label>Table 1</label>
<caption>
<p>Comparison of patients in automated versus manual reviews and measures of agreement for individual responses for continuous variables.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Variable name</th>
<th align="center" valign="top">Automation<break/>(Mean, SD)</th>
<th align="center" valign="top">Manual<break/>(Mean, SD)</th>
<th align="center" valign="top">Mean difference (SE)</th>
<th align="center" valign="top">Pearson interclass correlation coefficient (PCC), 95% CI</th>
<th align="left" valign="top">Strength of agreement based on PCC</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Age, <italic>N</italic>&#x2009;=&#x2009;652</td>
<td align="center" valign="top">57.9 (21.9)</td>
<td align="center" valign="top">58.5 (19.9)</td>
<td align="center" valign="top">&#x2212;0.5 (0.3)</td>
<td align="center" valign="top">0.95 (0.94&#x2013;0.96)</td>
<td align="left" valign="top">Extremely High</td>
</tr>
<tr>
<td align="left" valign="top">Height, <italic>N</italic>&#x2009;=&#x2009;632</td>
<td align="center" valign="top">167.6 (15.6)</td>
<td align="center" valign="top">167 (17.2)</td>
<td align="center" valign="top">0.6 (0.3)</td>
<td align="center" valign="top">0.89 (0.87&#x2013;0.90)</td>
<td align="left" valign="top">High</td>
</tr>
<tr>
<td align="left" valign="top">Weight, <italic>N</italic>&#x2009;=&#x2009;632</td>
<td align="center" valign="top">87.2 (27)</td>
<td align="center" valign="top">88.4 (28.5)</td>
<td align="center" valign="top">&#x2212;1.2 (0.4)</td>
<td align="center" valign="top">0.94 (0.93&#x2013;0.95)</td>
<td align="left" valign="top">Extremely High</td>
</tr>
<tr>
<td align="left" valign="top">Hospital LOS, <italic>N</italic>&#x2009;=&#x2009;540</td>
<td align="center" valign="top">9.0 (9.1)</td>
<td align="center" valign="top">9.0 (9)</td>
<td align="center" valign="top">0.1 (0.1)</td>
<td align="center" valign="top">0.97 (0.96&#x2013;0.97)</td>
<td align="left" valign="top">Extremely High</td>
</tr>
<tr>
<td align="left" valign="top">Days to ICU admission, <italic>N</italic>&#x2009;=&#x2009;176</td>
<td align="center" valign="top">1.3 (3.3)</td>
<td align="center" valign="top">1.1 (2.6)</td>
<td align="center" valign="top">0.2 (0.1)</td>
<td align="center" valign="top">0.80 (0.74&#x2013;0.85)</td>
<td align="left" valign="top">High</td>
</tr>
<tr>
<td align="left" valign="top">ICU LOS, <italic>N</italic>&#x2009;=&#x2009;168</td>
<td align="center" valign="top">7.5 (9.3)</td>
<td align="center" valign="top">9.0 (10.5)</td>
<td align="center" valign="top">&#x2212;1.5 (0.4)</td>
<td align="center" valign="top">0.88 (0.85&#x2013;0.91)</td>
<td align="left" valign="top">High</td>
</tr>
<tr>
<td align="left" valign="top">IMV Days, <italic>N</italic>&#x2009;=&#x2009;71</td>
<td align="center" valign="top">9.7 (9.6)</td>
<td align="center" valign="top">11.6 (11.1)</td>
<td align="center" valign="top">&#x2212;1.9 (0.6)</td>
<td align="center" valign="top">0.88 (0.81&#x2013;0.92)</td>
<td align="left" valign="top">High</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>CI, Confidence interval; ICU, Intensive Care Unit; IMV, Invasive Mechanical Ventilation; LOS, Length of stay; PCC, Pearson Interclass Correlation Coefficient; SD, Standard deviation; SE, Standard error.</p>
</table-wrap-foot>
</table-wrap>
<fig position="float" id="fig2"><label>Figure 2</label>
<caption>
<p>Agreement between manual and PEEP (Bland&#x2013;Altman plot). <bold>(A)</bold> Age. <bold>(B)</bold> Weight. <bold>(C)</bold> Height. <bold>(D)</bold> Hospital Length of Stay. <bold>(E)</bold> Days to ICU admission. <bold>(F)</bold> ICU Length of Stay. <bold>(G)</bold> IMV Days.</p>
</caption>
<graphic xlink:href="fmed-10-1089087-g002.tif"/>
</fig>
<p><xref rid="tab2" ref-type="table">Tables 2</xref>, <xref rid="tab3" ref-type="table">3</xref> describe the ordinal and nominal variables. The agreement between manual and automated extraction was almost perfect in 13 (72.2%) of the studied variables, and substantial in five (27.8%). The comorbidity &#x201C;dyslipidemia/hyperlipidemia&#x201D; had the lowest degree of agreement (moderate 0.61); however, overall percent agreement was high (86.9%). The only variable that showed a Kappa Coefficient equal to 1 was &#x201C;ICU-discharge status.&#x201D; The average Kappa Coefficient was 0.81 for the eight comorbidities collected and was 0.86 for outcomes variables, considered almost perfect. The automated electronic search strategy achieved an average sensitivity of 90.7% and a specificity of 96.9%. The sensitivity and specificity of each data-extraction method for all variables are presented in <xref rid="tab3" ref-type="table">Table 3</xref>.</p>
<table-wrap position="float" id="tab2"><label>Table 2</label>
<caption>
<p>Comparison of patients in automated versus manual reviews and measures of agreement for individual responses for categorical (ordinal) variables.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Variable name</th>
<th align="center" valign="top">Automated vs. manual, percent agreement</th>
<th align="center" valign="top">Kappa coefficient (95% CI, SE)</th>
<th align="left" valign="top">Strength of agreement based on Kappa coefficient</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Race, <italic>N</italic>&#x2009;=&#x2009;652</td>
<td/>
<td align="center" valign="middle" rowspan="5">0.91 (0.88&#x2013;0.93, 0.01)</td>
<td align="left" valign="middle" rowspan="5">Almost perfect</td>
</tr>
<tr>
<td align="left" valign="top">White Caucasian</td>
<td align="center" valign="top">365/372 (98.1)</td>
</tr>
<tr>
<td align="left" valign="top">Black or African American</td>
<td align="center" valign="top">138/139 (99.3)</td>
</tr>
<tr>
<td align="left" valign="top">Others</td>
<td align="center" valign="top">111/141 (78.7)</td>
</tr>
<tr>
<td align="left" valign="top">Total</td>
<td align="center" valign="top">614/652 (94.2)</td>
</tr>
<tr>
<td align="left" valign="top">Ethnicity, <italic>N</italic>&#x2009;=&#x2009;652</td>
<td/>
<td align="center" valign="middle" rowspan="5">0.88 (0.84&#x2013;0.93, 0.02)</td>
<td align="left" valign="middle" rowspan="5">Almost perfect</td>
</tr>
<tr>
<td align="left" valign="top">Non-Hispanic</td>
<td align="center" valign="top">506/512 (98.8)</td>
</tr>
<tr>
<td align="left" valign="top">Hispanic</td>
<td align="center" valign="top">97/105 (92.4)</td>
</tr>
<tr>
<td align="left" valign="top">Unknown/Not applicable</td>
<td align="center" valign="top">23/35 (65.7)</td>
</tr>
<tr>
<td align="left" valign="top">Total</td>
<td align="center" valign="top">626/652 (96)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>CI, Confidence interval; SE, Standard error.</p>
</table-wrap-foot>
</table-wrap>
<table-wrap position="float" id="tab3"><label>Table 3</label>
<caption>
<p>Comparison of patients in automated versus manual reviews and measures of agreement for individual responses for categorical (nominal) variables.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Variable name</th>
<th align="center" valign="top">Percent agreement, automated vs. manual</th>
<th align="center" valign="top">Sensitivity</th>
<th align="center" valign="top">Specificity</th>
<th align="center" valign="top">Kappa coefficient (95% CI, SE)</th>
<th align="left" valign="top">Strength of agreement based on Kappa coefficient</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Sex, <italic>N</italic>&#x2009;=&#x2009;652</td>
<td/>
<td align="center" valign="top" rowspan="4">99.7</td>
<td align="center" valign="top" rowspan="4">99.7</td>
<td align="center" valign="top" rowspan="4">0.99 (0.99&#x2013;1.0, 0)</td>
<td align="left" valign="top" rowspan="4">Almost perfect</td>
</tr>
<tr>
<td align="left" valign="top">Male</td>
<td align="center" valign="top">359/360 (99.7)</td>
</tr>
<tr>
<td align="left" valign="top">Female</td>
<td align="center" valign="top">291/292 (99.7)</td>
</tr>
<tr>
<td align="left" valign="top">Total</td>
<td align="center" valign="top">650/652 (99.7)</td>
</tr>
<tr>
<td align="left" valign="top">Coronary artery disease, <italic>N</italic>&#x2009;=&#x2009;540</td>
<td/>
<td align="center" valign="top" rowspan="4">98.6</td>
<td align="center" valign="top" rowspan="4">97.4</td>
<td align="center" valign="top" rowspan="4">0.90 (0.85&#x2013;0.96, 0.03)</td>
<td align="left" valign="top" rowspan="4">Almost perfect</td>
</tr>
<tr>
<td align="left" valign="top">Yes</td>
<td align="center" valign="top">73/74 (98.6)</td>
</tr>
<tr>
<td align="left" valign="top">No</td>
<td align="center" valign="top">454/466 (97.4)</td>
</tr>
<tr>
<td align="left" valign="top">Total</td>
<td align="center" valign="top">527/540 (97.6)</td>
</tr>
<tr>
<td align="left" valign="top">Hypertension, <italic>N</italic>&#x2009;=&#x2009;540</td>
<td/>
<td align="center" valign="top" rowspan="4">92.0</td>
<td align="center" valign="top" rowspan="4">93.5</td>
<td align="center" valign="top" rowspan="4">0.85 (0.80&#x2013;0.89, 0.02)</td>
<td align="left" valign="top" rowspan="4">Almost perfect</td>
</tr>
<tr>
<td align="left" valign="top">Yes</td>
<td align="center" valign="top">298/324 (92.0)</td>
</tr>
<tr>
<td align="left" valign="top">No</td>
<td align="center" valign="top">202/216 (93.5)</td>
</tr>
<tr>
<td align="left" valign="top">Total</td>
<td align="center" valign="top">500/540 (92.6)</td>
</tr>
<tr>
<td align="left" valign="top">Congestive heart failure, <italic>N</italic>&#x2009;=&#x2009;540</td>
<td/>
<td align="center" valign="top" rowspan="4">88.0</td>
<td align="center" valign="top" rowspan="4">97.8</td>
<td align="center" valign="top" rowspan="4">0.82 (0.74&#x2013;0.90, 0.04)</td>
<td align="left" valign="top" rowspan="4">Almost perfect</td>
</tr>
<tr>
<td align="left" valign="top">Yes</td>
<td align="center" valign="top">44/50 (88)</td>
</tr>
<tr>
<td align="left" valign="top">No</td>
<td align="center" valign="top">479/490 (97.8)</td>
</tr>
<tr>
<td align="left" valign="top">Total</td>
<td align="center" valign="top">523/540 (96.7)</td>
</tr>
<tr>
<td align="left" valign="top">Chronic obstructive pulmonary disease, <italic>N</italic>&#x2009;=&#x2009;540</td>
<td/>
<td align="center" valign="top" rowspan="4">92.7</td>
<td align="center" valign="top" rowspan="4">96.3</td>
<td align="center" valign="top" rowspan="4">0.80 (0.72&#x2013;0.88, 0.04)</td>
<td align="left" valign="top" rowspan="4">Substantial</td>
</tr>
<tr>
<td align="left" valign="top">Yes</td>
<td align="center" valign="top">51/55 (92.7)</td>
</tr>
<tr>
<td align="left" valign="top">No</td>
<td align="center" valign="top">467/485 (96.3)</td>
</tr>
<tr>
<td align="left" valign="top">Total</td>
<td align="center" valign="top">518/540 (95.9)</td>
</tr>
<tr>
<td align="left" valign="top">Asthma, <italic>N</italic>&#x2009;=&#x2009;540</td>
<td/>
<td align="center" valign="top" rowspan="4">93.7</td>
<td align="center" valign="top" rowspan="4">95.8</td>
<td align="center" valign="top" rowspan="4">0.81 (0.73&#x2013;0.88, 0.04)</td>
<td align="left" valign="top" rowspan="4">Almost perfect</td>
</tr>
<tr>
<td align="left" valign="top">Yes</td>
<td align="center" valign="top">59/63 (93.7)</td>
</tr>
<tr>
<td align="left" valign="top">No</td>
<td align="center" valign="top">457/477 (95.8)</td>
</tr>
<tr>
<td align="left" valign="top">Total</td>
<td align="center" valign="top">516/540 (95.6)</td>
</tr>
<tr>
<td align="left" valign="top">Chronic kidney disease, <italic>N</italic>&#x2009;=&#x2009;540</td>
<td/>
<td align="center" valign="top" rowspan="4">81.2</td>
<td align="center" valign="top" rowspan="4">96.2</td>
<td align="center" valign="top" rowspan="4">0.79 (0.72&#x2013;0.85, 0.03)</td>
<td align="left" valign="top" rowspan="4">Substantial</td>
</tr>
<tr>
<td align="left" valign="top">Yes</td>
<td align="center" valign="top">95/117 (81.2)</td>
</tr>
<tr>
<td align="left" valign="top">No</td>
<td align="center" valign="top">407/423 (96.2)</td>
</tr>
<tr>
<td align="left" valign="top">Total</td>
<td align="center" valign="top">502/540 (93)</td>
</tr>
<tr>
<td align="left" valign="top">Diabetes mellitus, <italic>N</italic>&#x2009;=&#x2009;540</td>
<td/>
<td align="center" valign="top" rowspan="4">92.1</td>
<td align="center" valign="top" rowspan="4">96.3</td>
<td align="center" valign="top" rowspan="4">0.89 (0.85&#x2013;0.93, 0.02)</td>
<td align="left" valign="top" rowspan="4">Almost perfect</td>
</tr>
<tr>
<td align="left" valign="top">Yes</td>
<td align="center" valign="top">176/191 (92.1)</td>
</tr>
<tr>
<td align="left" valign="top">No</td>
<td align="center" valign="top">336/349 (96.3)</td>
</tr>
<tr>
<td align="left" valign="top">Total</td>
<td align="center" valign="top">512/540 (94.8)</td>
</tr>
<tr>
<td align="left" valign="top">Dyslipidemia/Hyperlipidemia, <italic>N</italic>&#x2009;=&#x2009;540</td>
<td/>
<td align="center" valign="top" rowspan="4">88.9</td>
<td align="center" valign="top" rowspan="4">86.4</td>
<td align="center" valign="top" rowspan="4">0.61 (0.53&#x2013;0.69, 0.04)</td>
<td align="left" valign="top" rowspan="4">Substantial</td>
</tr>
<tr>
<td align="left" valign="top">Yes</td>
<td align="center" valign="top">80/90 (88.9)</td>
</tr>
<tr>
<td align="left" valign="top">No</td>
<td align="center" valign="top">389/450 (86.4)</td>
</tr>
<tr>
<td align="left" valign="top">Total</td>
<td align="center" valign="top">469/540 (86.9)</td>
</tr>
<tr>
<td align="left" valign="top">ICU admission rate, <italic>N</italic>&#x2009;=&#x2009;611</td>
<td/>
<td align="center" valign="top" rowspan="4">90.3</td>
<td align="center" valign="top" rowspan="4">95.2</td>
<td align="center" valign="top" rowspan="4">0.86 (0.82&#x2013;0.90, 0.02)</td>
<td align="left" valign="top" rowspan="4">Almost perfect</td>
</tr>
<tr>
<td align="left" valign="top">Yes</td>
<td align="center" valign="top">215/238 (90.3)</td>
</tr>
<tr>
<td align="left" valign="top">No</td>
<td align="center" valign="top">355/373 (95.2)</td>
</tr>
<tr>
<td align="left" valign="top">Total</td>
<td align="center" valign="top">570/611 (93.3)</td>
</tr>
<tr>
<td align="left" valign="top">IMV rate, <italic>N</italic>&#x2009;=&#x2009;582</td>
<td/>
<td align="center" valign="top" rowspan="4">87.7</td>
<td align="center" valign="top" rowspan="4">98</td>
<td align="center" valign="top" rowspan="4">0.85 (0.79&#x2013;0.92, 0.03)</td>
<td align="left" valign="top" rowspan="4">Almost perfect</td>
</tr>
<tr>
<td align="left" valign="top">Yes</td>
<td align="center" valign="top">64/73 (87.7)</td>
</tr>
<tr>
<td align="left" valign="top">No</td>
<td align="center" valign="top">499/509 (98)</td>
</tr>
<tr>
<td align="left" valign="top">Total</td>
<td align="center" valign="top">563/582 (96.7)</td>
</tr>
<tr>
<td align="left" valign="top">NIMV rate, <italic>N</italic>&#x2009;=&#x2009;581</td>
<td/>
<td align="center" valign="top" rowspan="4">83.3</td>
<td align="center" valign="top" rowspan="4">99.3</td>
<td align="center" valign="top" rowspan="4">0.80 (0.66&#x2013;0.95, 0.07)</td>
<td align="left" valign="top" rowspan="4">Substantial</td>
</tr>
<tr>
<td align="left" valign="top">Yes</td>
<td align="center" valign="top">15/18 (83.3)</td>
</tr>
<tr>
<td align="left" valign="top">No</td>
<td align="center" valign="top">559/563 (99.3)</td>
</tr>
<tr>
<td align="left" valign="top">Total</td>
<td align="center" valign="top">574/581 (98.3)</td>
</tr>
<tr>
<td align="left" valign="top">HFNC rate, <italic>N</italic>&#x2009;=&#x2009;581</td>
<td/>
<td align="center" valign="top" rowspan="4">100</td>
<td align="center" valign="top" rowspan="4">98.9</td>
<td align="center" valign="top" rowspan="4">0.86 (0.75&#x2013;0.97, 0.06)</td>
<td align="left" valign="top" rowspan="4">Almost perfect</td>
</tr>
<tr>
<td align="left" valign="top">Yes</td>
<td align="center" valign="top">19/19 (100)</td>
</tr>
<tr>
<td align="left" valign="top">No</td>
<td align="center" valign="top">556/562 (98.9)</td>
</tr>
<tr>
<td align="left" valign="top">Total</td>
<td align="center" valign="top">575/581 (99)</td>
</tr>
<tr>
<td align="left" valign="top">ECMO rate, <italic>N</italic>&#x2009;=&#x2009;581</td>
<td/>
<td align="center" valign="top" rowspan="4">72.7</td>
<td align="center" valign="top" rowspan="4">99.3</td>
<td align="center" valign="top" rowspan="4">0.69 (0.47&#x2013;0.91, 0.11)</td>
<td align="left" valign="top" rowspan="4">Substantial</td>
</tr>
<tr>
<td align="left" valign="top">Yes</td>
<td align="center" valign="top">8/11 (72.7)</td>
</tr>
<tr>
<td align="left" valign="top">No</td>
<td align="center" valign="top">566/570 (99.3)</td>
</tr>
<tr>
<td align="left" valign="top">Total</td>
<td align="center" valign="top">574/581 (98.8)</td>
</tr>
<tr>
<td align="left" valign="top">ICU discharge status, <italic>N</italic>&#x2009;=&#x2009;172</td>
<td/>
<td align="center" valign="top" rowspan="4">100</td>
<td align="center" valign="top" rowspan="4">100</td>
<td align="center" valign="top" rowspan="4">1.0 (1&#x2013;1, 0.0)</td>
<td align="left" valign="top" rowspan="4">Almost perfect</td>
</tr>
<tr>
<td align="left" valign="top">Death</td>
<td align="center" valign="top">9/9 (100)</td>
</tr>
<tr>
<td align="left" valign="top">Alive</td>
<td align="center" valign="top">163/163 (100)</td>
</tr>
<tr>
<td align="left" valign="top">Total</td>
<td align="center" valign="top">172/172 (100)</td>
</tr>
<tr>
<td align="left" valign="top">Hospital discharge status, <italic>N</italic>&#x2009;=&#x2009;541</td>
<td/>
<td align="center" valign="top" rowspan="4">90</td>
<td align="center" valign="top" rowspan="4">100</td>
<td align="center" valign="top" rowspan="4">0.94 (0.88&#x2013;1, 0.03)</td>
<td align="left" valign="top" rowspan="4">Almost perfect</td>
</tr>
<tr>
<td align="left" valign="top">Death</td>
<td align="center" valign="top">27/30 (90)</td>
</tr>
<tr>
<td align="left" valign="top">Alive</td>
<td align="center" valign="top">511/511 (100)</td>
</tr>
<tr>
<td align="left" valign="top">Total</td>
<td align="center" valign="top">538/541 (99.4)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>CI, Confidence interval; ECMO, Extracorporeal membrane oxygenation; HFNC, High Flow Nasal Canula; ICU, Intensive Care Unit; IMV, Invasive Mechanical Ventilation; LOS, Length of stay; NIMV; Non-Invasive Mechanical Ventilation; PCC, Pearson Interclass Correlation Coefficient; SE, Standard Error.</p>
</table-wrap-foot>
</table-wrap>
</sec>
<sec sec-type="discussions" id="sec15">
<title>Discussion</title>
<p>The automated search strategy for EHR data extraction was highly feasible and reliable. Our investigation observed substantial and almost perfect agreement between automated and manual data extraction. There was almost perfect agreement in two-thirds of the categorical variables, and all continuous variables showed Extremely High or High agreement.</p>
<p>The results of our validation study are similar to other studies that validated and evaluated automated data (<xref ref-type="bibr" rid="ref30 ref31 ref32 ref33">30&#x2013;33</xref>). Singh et al. (<xref ref-type="bibr" rid="ref31">31</xref>) developed several algorithm queries to identify every component of the Charlson Comorbidity Index and found median sensitivity and specificity of 98&#x2013;100% and 98&#x2013;100%, respectively. In the validation cohort, the sensitivity of the automated digital algorithm ranged from 91 to 100%, and the specificity ranged from 98 to 100% compared to ICD-9 codes. These results are comparable to our study as the comorbidities analyzed presented a sensitivity and specificity of 90.2 and 96.8%, respectively. Our results are superior to the results of Schaerfer et al. (<xref ref-type="bibr" rid="ref34">34</xref>), who found a sensitivity of 72% and a specificity of 95% for comorbidities (CHF, cerebral vascular disease, CKD, cancer, DM, human immunodeficiency virus, HTN) in patients with COVID-19 pneumonia using ICD-10 base-data comparing to manual data collection. We also successfully compared seven continuous variables with three extremely high agreement and four high agreement in comparison to Brazeal et al. (<xref ref-type="bibr" rid="ref35">35</xref>), who compared two variables (age and BMI) for manual versus automation in a study population comprised of patients with histologically confirmed advanced adenomatous colorectal polyp.</p>
<p>Manual data extractors can overcome diverse interface issues, read and analyze free text, and provide clinical judgment when retrieving and interpreting data; however, manual data extraction is limited to human resources and is prone to human error (<xref ref-type="bibr" rid="ref7">7</xref>, <xref ref-type="bibr" rid="ref32">32</xref>, <xref ref-type="bibr" rid="ref36">36</xref>). In addition to requiring considerable amount of time, manual data extraction also necessitates qualified personnel (<xref ref-type="bibr" rid="ref30">30</xref>, <xref ref-type="bibr" rid="ref33">33</xref>). During the COVID-19 pandemic, where real-time data is paramount, automated data has proven validity and efficacy, and may divert personnel to patient care and other vital tasks. Nonetheless, automated data is not flawless. A significant limitation is finding a unique algorithm that can be applied to every center. Variables collected as free text fields are another challenge for such validations. The automated VIRUS COVID-19 sites had reported over a large majority of variables collected using this method. Currently, more than 60,000 patients and their data variables in the registry had been collected through efforts of the VIRUS-PEEP group, which has allowed for updates and complete data in the shortest possible time.</p>
<sec id="sec16">
<title>Challenges in automation</title>
<p>The environment for data collection is often a shared environment within an institution, and there are limitations on how much data may be extracted and processed in one job and how much post-abstraction processing is necessary. Microsoft SQL and TSQL solutions process substantial amounts of data from many different tables and can take a long time to run on large populations. There are clinical documentation differences between the various sites requiring additional coding when applying the data requirements and rules. Establishing logic for data elements within a given EHR can be time consuming up front, requiring close collaboration between clinician and analytics teams. Data may be stored differently between multiple medical centers in one institution, requiring processing to comply with data requirements for standardization. While sites can share coding experience in data abstraction between similar data storage structure, variable coding schemes pose challenges for direct translation between sites. Lastly, one information technology employee often works on such projects with competing priorities.</p>
</sec>
<sec id="sec17">
<title>Strengths and limitations</title>
<p>To our knowledge this is first multicenter study to evaluate the feasibility of automation process during COVID-19 pandemic. This automation process should be applicable to any EHR vendor (EHR type agnostic), and these purposeful sampled representative data points would be relevant to any other clinical study/trial, which is a major strength of this study. Nonparticipation of 19 sites out of 30 sites in the VIRUS-PEEP group, which leads to a possibility of selection bias, is a major limitation. The time constraints in the ongoing pandemic at participating sites were the reason behind this non-participation in the validation process. However, extracting data across 11 different centers is one of the strengths of this study; it could also highlight the variations in staff, procedures, and patients at these institutions. Although the SQL queries could be applicable in most sites, some sites required a new SQL tailored to their data architecture. One key limitation for our group was that all sites found a portion of data extraction that could not be automated, including variables which are described in narrative, such as, patient symptoms, estimated duration of onset of symptoms, and imaging interpretations. Another limitation is a notable discrepancy between manual and EMR extraction for important outcomes like ICU LOS and IMV days. The automation process relies on procedure order date (intubation/extubation) and ADT (hospital/ICU admission discharge transfer) order date and time and discontinuation date in EHR; however the manual extractor look for first-time documented ICU or IMV in her, which probably could account for such notable discrepancy in outcomes like ICU LOS and IMV days. Transferring a patient to a location that was not a usual ICU due to COVID-19 surge may be another possible explanation for the observed lower sensitivity of ICU admission rate. Variation in creation of make-shift ICUs at different institution may have caused this discrepancy in automation of ICU admissions documentation. It partially explains the lower sensitivity and high specificity of ICU admission, IMV, NIMV, and ECMO rates by automation process. Another noticeable issue was that the manual data extraction was done in real time and automation was done when the patient was discharged and mainly relied on billing codes and manually verified data available in EHR.</p>
</sec>
<sec id="sec18">
<title>Future direction</title>
<p>Future research on this topic could involve a thorough comparison of all patient records extracted using two methods: manual extraction and automated SQL queries. The data comparison could be done by aligning data points across a wide range of variables for each data extraction method and then statistically analyzing their consistency and discrepancies. This detailed comparison would verify the reliability of automated data extraction and provide insights into areas that could be improved for greater accuracy.</p>
</sec>
</sec>
<sec sec-type="conclusions" id="sec19">
<title>Conclusion</title>
<p>This study confirms the feasibility, reliability, and validity of an automated process to gather data from the EHR. The use of automated data is comparable to the gold standard. The utilization of automated data extraction provides additional solutions when a rapid and large volume of patient data needs to be extracted.</p>
</sec>
<sec sec-type="data-availability" id="sec20">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="sec21" sec-type="ethics-statement">
<title>Ethics statement</title>
<p>The studies involving humans were approved by Mayo Clinic Institutional Review Board. The studies were conducted in accordance with the local legislation and institutional requirements. The ethics committee/institutional review board waived the requirement of written informed consent for participation from the participants or the participants&#x2019; legal guardians/next of kin because The Mayo Clinic Institutional Review Board authorized the SCCM Discovery VIRUS COVID-19 registry as exempt on March 23, 2020 (IRB number: 20&#x2013;002610). No informed consent was deemed necessary for the study subjects. The procedures were followed in accordance with the Helsinki Declaration of 2013.</p>
</sec>
<sec id="sec22">
<title>Author contributions</title>
<p>DV and VB contributed equally in the defining the study outline and manuscript writing. VB, SH, JC, MS, AT, MB, SZ, NS, RC-C, RN, DS, AN, SK, KAB, J-TC, RM, IS, RR, and KB did the data review and collection. DV, VB, and SH did the statistical analysis. VH, JD, AW, VK, and RK did the study design and critical review. DV, VB, SH, and RK were guarantor of the manuscript and took responsibility for the integrity of the work as a whole, from inception to published article. All authors contributed to the article and approved the submitted version.</p>
</sec>
</body>
<back>
<sec sec-type="funding-information" id="sec25">
<title>Funding</title>
<p>The VIRUS: COVID-19 Registry was supported, in part, by the Gordon and Betty Moore Foundation, and Janssen Research &#x0026; Development, LLC. They have no role in data gathering, analysis, interpretation, and writing.</p>
</sec>
<ack>
<p>Data from this study was submitted and presented as an abstract format for the Chest 2023 Conferences at Hawai&#x02BB;i Convention Center, Honolulu, Hawai&#x02BB;i.</p>
</ack>
<sec sec-type="COI-statement" id="sec23">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="sec100" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><label>1.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname><given-names>C</given-names></name> <name><surname>Horby</surname><given-names>PW</given-names></name> <name><surname>Hayden</surname><given-names>FG</given-names></name> <name><surname>Gao</surname><given-names>GF</given-names></name></person-group>. <article-title>A novel coronavirus outbreak of global health concern</article-title>. <source>Lancet</source>. (<year>2020</year>) <volume>395</volume>:<fpage>470</fpage>&#x2013;<lpage>3</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S0140-6736(20)30185-9</pub-id>, PMID: <pub-id pub-id-type="pmid">31986257</pub-id></citation></ref>
<ref id="ref2"><label>2.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Domecq</surname><given-names>JP</given-names></name> <name><surname>Lal</surname><given-names>A</given-names></name> <name><surname>Sheldrick</surname><given-names>CR</given-names></name> <name><surname>Kumar</surname><given-names>VK</given-names></name> <name><surname>Boman</surname><given-names>K</given-names></name> <name><surname>Bolesta</surname><given-names>S</given-names></name> <etal/></person-group>. <article-title>Outcomes of patients with coronavirus disease 2019 receiving organ support therapies: the international viral infection and respiratory illness universal study registry</article-title>. <source>Crit Care Med</source>. (<year>2021</year>) <volume>49</volume>:<fpage>437</fpage>&#x2013;<lpage>48</lpage>. doi: <pub-id pub-id-type="doi">10.1097/CCM.0000000000004879</pub-id>, PMID: <pub-id pub-id-type="pmid">33555777</pub-id></citation></ref>
<ref id="ref3"><label>3.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Walkey</surname><given-names>AJ</given-names></name> <name><surname>Kumar</surname><given-names>VK</given-names></name> <name><surname>Harhay</surname><given-names>MO</given-names></name> <name><surname>Bolesta</surname><given-names>S</given-names></name> <name><surname>Bansal</surname><given-names>V</given-names></name> <name><surname>Gajic</surname><given-names>O</given-names></name> <etal/></person-group>. <article-title>The viral infection and respiratory illness universal study (VIRUS): an international registry of coronavirus 2019-related critical illness</article-title>. <source>Crit Care Explor</source>. (<year>2020</year>) <volume>2</volume>:<fpage>e0113</fpage>. doi: <pub-id pub-id-type="doi">10.1097/CCE.0000000000000113</pub-id>, PMID: <pub-id pub-id-type="pmid">32426754</pub-id></citation></ref>
<ref id="ref4"><label>4.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Walkey</surname><given-names>AJ</given-names></name> <name><surname>Sheldrick</surname><given-names>RC</given-names></name> <name><surname>Kashyap</surname><given-names>R</given-names></name> <name><surname>Kumar</surname><given-names>VK</given-names></name> <name><surname>Boman</surname><given-names>K</given-names></name> <name><surname>Bolesta</surname><given-names>S</given-names></name> <etal/></person-group>. <article-title>Guiding principles for the conduct of observational critical care research for coronavirus disease 2019 pandemics and beyond: the Society of Critical Care Medicine discovery viral infection and respiratory illness universal study registry</article-title>. <source>Crit Care Med</source>. (<year>2020</year>) <volume>48</volume>:<fpage>e1038</fpage>&#x2013;<lpage>44</lpage>. doi: <pub-id pub-id-type="doi">10.1097/CCM.0000000000004572</pub-id>, PMID: <pub-id pub-id-type="pmid">32932348</pub-id></citation></ref>
<ref id="ref5"><label>5.</label> <citation citation-type="other"><person-group person-group-type="author"><name><surname>Grimm</surname><given-names>AG</given-names></name></person-group> <article-title>Hospitals Reported That the COVID-19 Pandemic Has Significantly Strained Health Care Delivery Results of a National Pulse Survey</article-title>.  <publisher-loc>USA</publisher-loc>: <publisher-name>U.S. Department of Health and Human Services Office of Inspector General</publisher-name>. (<year>2021</year>). Available at: <ext-link xlink:href="https://oig.hhs.gov/oei/reports/OEI-09-21-00140.pdf" ext-link-type="uri">https://oig.hhs.gov/oei/reports/OEI-09-21-00140.pdf</ext-link></citation></ref>
<ref id="ref6"><label>6.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vassar</surname><given-names>M</given-names></name> <name><surname>Holzmann</surname><given-names>M</given-names></name></person-group>. <article-title>The retrospective chart review: important methodological considerations</article-title>. <source>J Educ Eval Health Prof</source>. (<year>2013</year>) <volume>10</volume>:<fpage>12</fpage>. doi: <pub-id pub-id-type="doi">10.3352/jeehp.2013.10.12</pub-id>, PMID: <pub-id pub-id-type="pmid">24324853</pub-id></citation></ref>
<ref id="ref7"><label>7.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yin</surname><given-names>AL</given-names></name> <name><surname>Guo</surname><given-names>WL</given-names></name> <name><surname>Sholle</surname><given-names>ET</given-names></name> <name><surname>Rajan</surname><given-names>M</given-names></name> <name><surname>Alshak</surname><given-names>MN</given-names></name> <name><surname>Choi</surname><given-names>JJ</given-names></name> <etal/></person-group>. <article-title>Comparing automated vs. manual data collection for COVID-specific medications from electronic health records</article-title>. <source>Int J Med Inform</source>. (<year>2022</year>) <volume>157</volume>:<fpage>104622</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2021.104622</pub-id>, PMID: <pub-id pub-id-type="pmid">34741892</pub-id></citation></ref>
<ref id="ref8"><label>8.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Byrne</surname><given-names>MD</given-names></name> <name><surname>Jordan</surname><given-names>TR</given-names></name> <name><surname>Welle</surname><given-names>T</given-names></name></person-group>. <article-title>Comparison of manual versus automated data collection method for an evidence-based nursing practice study</article-title>. <source>Appl Clin Inform</source>. (<year>2013</year>) <volume>4</volume>:<fpage>61</fpage>&#x2013;<lpage>74</lpage>. doi: <pub-id pub-id-type="doi">10.4338/ACI-2012-09-RA-0037</pub-id>, PMID: <pub-id pub-id-type="pmid">23650488</pub-id></citation></ref>
<ref id="ref9"><label>9.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lan</surname><given-names>H</given-names></name> <name><surname>Thongprayoon</surname><given-names>C</given-names></name> <name><surname>Ahmed</surname><given-names>A</given-names></name> <name><surname>Herasevich</surname><given-names>V</given-names></name> <name><surname>Sampathkumar</surname><given-names>P</given-names></name> <name><surname>Gajic</surname><given-names>O</given-names></name> <etal/></person-group>. <article-title>Automating quality metrics in the era of electronic medical records: digital signatures for ventilator bundle compliance</article-title>. <source>Biomed Res Int</source>. (<year>2015</year>) <volume>2015</volume>:<fpage>396508</fpage>:<fpage>1</fpage>&#x2013;<lpage>6</lpage>. doi: <pub-id pub-id-type="doi">10.1155/2015/396508</pub-id></citation></ref>
<ref id="ref10"><label>10.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Brundin-Mather</surname><given-names>R</given-names></name> <name><surname>Soo</surname><given-names>A</given-names></name> <name><surname>Zuege</surname><given-names>DJ</given-names></name> <name><surname>Niven</surname><given-names>DJ</given-names></name> <name><surname>Fiest</surname><given-names>K</given-names></name> <name><surname>Doig</surname><given-names>CJ</given-names></name> <etal/></person-group>. <article-title>Secondary EMR data for quality improvement and research: a comparison of manual and electronic data collection from an integrated critical care electronic medical record system</article-title>. <source>J Crit Care</source>. (<year>2018</year>) <volume>47</volume>:<fpage>295</fpage>&#x2013;<lpage>301</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jcrc.2018.07.021</pub-id>, PMID: <pub-id pub-id-type="pmid">30099330</pub-id></citation></ref>
<ref id="ref11"><label>11.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hersh</surname><given-names>WR</given-names></name> <name><surname>Cimino</surname><given-names>J</given-names></name> <name><surname>Payne</surname><given-names>PR</given-names></name> <name><surname>Embi</surname><given-names>P</given-names></name> <name><surname>Logan</surname><given-names>J</given-names></name> <name><surname>Weiner</surname><given-names>M</given-names></name> <etal/></person-group>. <article-title>Recommendations for the use of operational electronic health record data in comparative effectiveness research</article-title>. <source>EGEMS</source>. (<year>2013</year>) <volume>1</volume>:<fpage>1018</fpage>. doi: <pub-id pub-id-type="doi">10.13063/2327-9214.1018</pub-id></citation></ref>
<ref id="ref12"><label>12.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hersh</surname><given-names>WR</given-names></name> <name><surname>Weiner</surname><given-names>MG</given-names></name> <name><surname>Embi</surname><given-names>PJ</given-names></name> <name><surname>Logan</surname><given-names>JR</given-names></name> <name><surname>Payne</surname><given-names>PR</given-names></name> <name><surname>Bernstam</surname><given-names>EV</given-names></name> <etal/></person-group>. <article-title>Caveats for the use of operational electronic health record data in comparative effectiveness research</article-title>. <source>Med Care</source>. (<year>2013</year>) <volume>51</volume>:<fpage>S30</fpage>&#x2013;<lpage>7</lpage>. doi: <pub-id pub-id-type="doi">10.1097/MLR.0b013e31829b1dbd</pub-id>, PMID: <pub-id pub-id-type="pmid">23774517</pub-id></citation></ref>
<ref id="ref13"><label>13.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kahn</surname><given-names>MG</given-names></name> <name><surname>Callahan</surname><given-names>TJ</given-names></name> <name><surname>Barnard</surname><given-names>J</given-names></name> <name><surname>Bauck</surname><given-names>AE</given-names></name> <name><surname>Brown</surname><given-names>J</given-names></name> <name><surname>Davidson</surname><given-names>BN</given-names></name> <etal/></person-group>. <article-title>A harmonized data quality assessment terminology and framework for the secondary use of electronic health record data</article-title>. <source>EGEMS</source>. (<year>2016</year>) <volume>4</volume>:<fpage>1244</fpage>. doi: <pub-id pub-id-type="doi">10.13063/2327-9214.1244</pub-id>, PMID: <pub-id pub-id-type="pmid">27713905</pub-id></citation></ref>
<ref id="ref14"><label>14.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wei</surname><given-names>WQ</given-names></name> <name><surname>Leibson</surname><given-names>CL</given-names></name> <name><surname>Ransom</surname><given-names>JE</given-names></name> <name><surname>Kho</surname><given-names>AN</given-names></name> <name><surname>Caraballo</surname><given-names>PJ</given-names></name> <name><surname>Chai</surname><given-names>HS</given-names></name> <etal/></person-group>. <article-title>Impact of data fragmentation across healthcare centers on the accuracy of a high-throughput clinical phenotyping algorithm for specifying subjects with type 2 diabetes mellitus</article-title>. <source>J Am Med Inform Assoc</source>. (<year>2012</year>) <volume>19</volume>:<fpage>219</fpage>&#x2013;<lpage>24</lpage>. doi: <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000597</pub-id>, PMID: <pub-id pub-id-type="pmid">22249968</pub-id></citation></ref>
<ref id="ref15"><label>15.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Botsis</surname><given-names>T</given-names></name> <name><surname>Hartvigsen</surname><given-names>G</given-names></name> <name><surname>Chen</surname><given-names>F</given-names></name> <name><surname>Weng</surname><given-names>C</given-names></name></person-group>. <article-title>Secondary use of EHR: data quality issues and informatics opportunities</article-title>. <source>Summit Transl Bioinform</source>. (<year>2010</year>) <volume>2010</volume>:<fpage>1</fpage>&#x2013;<lpage>5</lpage>. PMID: <pub-id pub-id-type="pmid">21347133</pub-id></citation></ref>
<ref id="ref16"><label>16.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hripcsak</surname><given-names>G</given-names></name> <name><surname>Albers</surname><given-names>DJ</given-names></name></person-group>. <article-title>Next-generation phenotyping of electronic health records</article-title>. <source>J Am Med Inform Assoc</source>. (<year>2013</year>) <volume>20</volume>:<fpage>117</fpage>&#x2013;<lpage>21</lpage>. doi: <pub-id pub-id-type="doi">10.1136/amiajnl-2012-001145</pub-id>, PMID: <pub-id pub-id-type="pmid">22955496</pub-id></citation></ref>
<ref id="ref17"><label>17.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Prokosch</surname><given-names>HU</given-names></name> <name><surname>Ganslandt</surname><given-names>T</given-names></name></person-group>. <article-title>Perspectives for medical informatics. Reusing the electronic medical record for clinical research</article-title>. <source>Methods Inf Med</source>. (<year>2009</year>) <volume>48</volume>:<fpage>38</fpage>&#x2013;<lpage>44</lpage>. doi: <pub-id pub-id-type="doi">10.3414/ME9132</pub-id>, PMID: <pub-id pub-id-type="pmid">19151882</pub-id></citation></ref>
<ref id="ref18"><label>18.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Weiskopf</surname><given-names>NG</given-names></name> <name><surname>Hripcsak</surname><given-names>G</given-names></name> <name><surname>Swaminathan</surname><given-names>S</given-names></name> <name><surname>Weng</surname><given-names>C</given-names></name></person-group>. <article-title>Defining and measuring completeness of electronic health records for secondary use</article-title>. <source>J Biomed Inform</source>. (<year>2013</year>) <volume>46</volume>:<fpage>830</fpage>&#x2013;<lpage>6</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jbi.2013.06.010</pub-id>, PMID: <pub-id pub-id-type="pmid">23820016</pub-id></citation></ref>
<ref id="ref19"><label>19.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Weiskopf</surname><given-names>NG</given-names></name> <name><surname>Cohen</surname><given-names>AM</given-names></name> <name><surname>Hannan</surname><given-names>J</given-names></name> <name><surname>Jarmon</surname><given-names>T</given-names></name> <name><surname>Dorr</surname><given-names>DA</given-names></name></person-group>. <article-title>Towards augmenting structured EHR data: a comparison of manual chart review and patient self-report</article-title>. <source>AMIA Annu Symp Proc</source>. (<year>2019</year>) <volume>2019</volume>:<fpage>903</fpage>&#x2013;<lpage>12</lpage>.</citation></ref>
<ref id="ref20"><label>20.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kern</surname><given-names>LM</given-names></name> <name><surname>Malhotra</surname><given-names>S</given-names></name> <name><surname>Barr&#x00F3;n</surname><given-names>Y</given-names></name> <name><surname>Quaresimo</surname><given-names>J</given-names></name> <name><surname>Dhopeshwarkar</surname><given-names>R</given-names></name> <name><surname>Pichardo</surname><given-names>M</given-names></name> <etal/></person-group>. <article-title>Accuracy of electronically reported &#x201C;meaningful use&#x201D; clinical quality measures: a cross-sectional study</article-title>. <source>Ann Intern Med</source>. (<year>2013</year>) <volume>158</volume>:<fpage>77</fpage>&#x2013;<lpage>83</lpage>. doi: <pub-id pub-id-type="doi">10.7326/0003-4819-158-2-201301150-00001</pub-id>, PMID: <pub-id pub-id-type="pmid">23318309</pub-id></citation></ref>
<ref id="ref21"><label>21.</label> <citation citation-type="other"><person-group person-group-type="author"><collab id="coll2">The Society of Critical Care Medicine, Lyntek Medical Technologies Inc</collab></person-group>. <source>VIRUS COVID-19 registry dashboard: a COVID-19 registry of current ICU and hospital care patterns USA2020</source>. (<year>2021</year>). <comment>Available at: </comment><ext-link xlink:href="https://sccmcovid19.org/" ext-link-type="uri">https://sccmcovid19.org/</ext-link>.</citation></ref>
<ref id="ref22"><label>22.</label> <citation citation-type="journal"><person-group person-group-type="author"><collab id="coll3">General Assembly of the World Medical Association</collab></person-group>. <article-title>World medical association declaration of Helsinki: ethical principles for medical research involving human subjects</article-title>. <source>J Am Coll Dent</source>. (<year>2014</year>) <volume>81</volume>:<fpage>14</fpage>&#x2013;<lpage>8</lpage>.</citation></ref>
<ref id="ref23"><label>23.</label> <citation citation-type="other"><person-group person-group-type="author"><collab id="coll4">World Health Organization-International Severe Acute Respiratory and Emerging Infection Consortium (WHO-ISARIC)</collab></person-group>. <source>Clinical data collection &#x2013; the COVID-19 case report forms (CRFs)</source> (<year>2020</year>). <comment>Available at: </comment><ext-link xlink:href="https://media.tghn.org/medialibrary/2020/03/ISARIC_COVID-19_CRF_V1.3_24Feb2020.pdf" ext-link-type="uri">https://media.tghn.org/medialibrary/2020/03/ISARIC_COVID-19_CRF_V1.3_24Feb2020.pdf</ext-link></citation></ref>
<ref id="ref24"><label>24.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Harris</surname><given-names>PA</given-names></name> <name><surname>Taylor</surname><given-names>R</given-names></name> <name><surname>Thielke</surname><given-names>R</given-names></name> <name><surname>Payne</surname><given-names>J</given-names></name> <name><surname>Gonzalez</surname><given-names>N</given-names></name> <name><surname>Conde</surname><given-names>JG</given-names></name></person-group>. <article-title>Research electronic data capture (REDCap)--a metadata-driven methodology and workflow process for providing translational research informatics support</article-title>. <source>J Biomed Inform</source>. (<year>2009</year>) <volume>42</volume>:<fpage>377</fpage>&#x2013;<lpage>81</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jbi.2008.08.010</pub-id>, PMID: <pub-id pub-id-type="pmid">18929686</pub-id></citation></ref>
<ref id="ref25"><label>25.</label> <citation citation-type="book"><person-group person-group-type="author"><name><surname>Wang</surname><given-names>J</given-names></name></person-group>. <article-title>Pearson correlation coefficient</article-title> In: <person-group person-group-type="editor"><name><surname>Dubitzky</surname> <given-names>W</given-names></name> <name><surname>Wolkenhauer</surname> <given-names>O</given-names></name> <name><surname>Cho</surname> <given-names>K-H</given-names></name> <name><surname>Yokota</surname> <given-names>H</given-names></name></person-group>, editors. <source>Encyclopedia of systems biology</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>Springer</publisher-name> (<year>2013</year>). <fpage>1671</fpage>.</citation></ref>
<ref id="ref26"><label>26.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mukaka</surname><given-names>MM</given-names></name></person-group>. <article-title>Statistics corner: a guide to appropriate use of correlation coefficient in medical research</article-title>. <source>Malawi Med J</source>. (<year>2012</year>) <volume>24</volume>:<fpage>69</fpage>&#x2013;<lpage>71</lpage>. PMID: <pub-id pub-id-type="pmid">23638278</pub-id></citation></ref>
<ref id="ref27"><label>27.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Altman</surname><given-names>DG</given-names></name> <name><surname>Bland</surname><given-names>JM</given-names></name></person-group>. <article-title>Measurement in Medicine - the analysis of method comparison studies</article-title>. <source>J Roy Stat Soc D-Sta</source>. (<year>1983</year>) <volume>32</volume>:<fpage>307</fpage>&#x2013;<lpage>17</lpage>. doi: <pub-id pub-id-type="doi">10.2307/2987937</pub-id></citation></ref>
<ref id="ref28"><label>28.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sun</surname><given-names>S</given-names></name></person-group>. <article-title>Meta-analysis of Cohen&#x2019;s kappa</article-title>. <source>Health Serv Outc Res Methodol</source>. (<year>2011</year>) <volume>11</volume>:<fpage>145</fpage>&#x2013;<lpage>63</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10742-011-0077-3</pub-id></citation></ref>
<ref id="ref29"><label>29.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Landis</surname><given-names>JR</given-names></name> <name><surname>Koch</surname><given-names>GG</given-names></name></person-group>. <article-title>The measurement of observer agreement for categorical data</article-title>. <source>Biometrics</source>. (<year>1977</year>) <volume>33</volume>:<fpage>159</fpage>&#x2013;<lpage>74</lpage>. doi: <pub-id pub-id-type="doi">10.2307/2529310</pub-id></citation></ref>
<ref id="ref30"><label>30.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Alsara</surname><given-names>A</given-names></name> <name><surname>Warner</surname><given-names>DO</given-names></name> <name><surname>Li</surname><given-names>G</given-names></name> <name><surname>Herasevich</surname><given-names>V</given-names></name> <name><surname>Gajic</surname><given-names>O</given-names></name> <name><surname>Kor</surname><given-names>DJ</given-names></name></person-group>. <article-title>Derivation and validation of automated electronic search strategies to identify pertinent risk factors for postoperative acute lung injury</article-title>. <source>Mayo Clin Proc</source>. (<year>2011</year>) <volume>86</volume>:<fpage>382</fpage>&#x2013;<lpage>8</lpage>. doi: <pub-id pub-id-type="doi">10.4065/mcp.2010.0802</pub-id>, PMID: <pub-id pub-id-type="pmid">21531881</pub-id></citation></ref>
<ref id="ref31"><label>31.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Singh</surname><given-names>B</given-names></name> <name><surname>Singh</surname><given-names>A</given-names></name> <name><surname>Ahmed</surname><given-names>A</given-names></name> <name><surname>Wilson</surname><given-names>GA</given-names></name> <name><surname>Pickering</surname><given-names>BW</given-names></name> <name><surname>Herasevich</surname><given-names>V</given-names></name> <etal/></person-group>. <article-title>Derivation and validation of automated electronic search strategies to extract Charlson comorbidities from electronic medical records</article-title>. <source>Mayo Clin Proc</source>. (<year>2012</year>) <volume>87</volume>:<fpage>817</fpage>&#x2013;<lpage>24</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.mayocp.2012.04.015</pub-id>, PMID: <pub-id pub-id-type="pmid">22958988</pub-id></citation></ref>
<ref id="ref32"><label>32.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rishi</surname><given-names>MA</given-names></name> <name><surname>Kashyap</surname><given-names>R</given-names></name> <name><surname>Wilson</surname><given-names>G</given-names></name> <name><surname>Hocker</surname><given-names>S</given-names></name></person-group>. <article-title>Retrospective derivation and validation of a search algorithm to identify extubation failure in the intensive care unit</article-title>. <source>BMC Anesthesiol</source>. (<year>2014</year>) <volume>14</volume>:<fpage>41</fpage>. doi: <pub-id pub-id-type="doi">10.1186/1471-2253-14-41</pub-id>, PMID: <pub-id pub-id-type="pmid">24891838</pub-id></citation></ref>
<ref id="ref33"><label>33.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Smischney</surname><given-names>NJ</given-names></name> <name><surname>Velagapudi</surname><given-names>VM</given-names></name> <name><surname>Onigkeit</surname><given-names>JA</given-names></name> <name><surname>Pickering</surname><given-names>BW</given-names></name> <name><surname>Herasevich</surname><given-names>V</given-names></name> <name><surname>Kashyap</surname><given-names>R</given-names></name></person-group>. <article-title>Retrospective derivation and validation of a search algorithm to identify emergent endotracheal intubations in the intensive care unit</article-title>. <source>Appl Clin Inform</source>. (<year>2013</year>) <volume>4</volume>:<fpage>419</fpage>&#x2013;<lpage>27</lpage>. doi: <pub-id pub-id-type="doi">10.4338/ACI-2013-05-RA-0033</pub-id>, PMID: <pub-id pub-id-type="pmid">24155793</pub-id></citation></ref>
<ref id="ref34"><label>34.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schaefer</surname><given-names>JW</given-names></name> <name><surname>Riley</surname><given-names>JM</given-names></name> <name><surname>Li</surname><given-names>M</given-names></name> <name><surname>Cheney-Peters</surname><given-names>DR</given-names></name> <name><surname>Venkataraman</surname><given-names>CM</given-names></name> <name><surname>Li</surname><given-names>CJ</given-names></name> <etal/></person-group>. <article-title>Comparing reliability of ICD-10-based COVID-19 comorbidity data to manual chart review, a retrospective cross-sectional study</article-title>. <source>J Med Virol</source>. (<year>2022</year>) <volume>94</volume>:<fpage>1550</fpage>&#x2013;<lpage>7</lpage>. doi: <pub-id pub-id-type="doi">10.1002/jmv.27492</pub-id>, PMID: <pub-id pub-id-type="pmid">34850420</pub-id></citation></ref>
<ref id="ref35"><label>35.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Brazeal</surname><given-names>JG</given-names></name> <name><surname>Alekseyenko</surname><given-names>AV</given-names></name> <name><surname>Li</surname><given-names>H</given-names></name> <name><surname>Fugal</surname><given-names>M</given-names></name> <name><surname>Kirchoff</surname><given-names>K</given-names></name> <name><surname>Marsh</surname><given-names>C</given-names></name> <etal/></person-group>. <article-title>Assessing quality and agreement of structured data in automatic versus manual abstraction of the electronic health record for a clinical epidemiology study</article-title>. <source>Res Methods Med Health Sci</source>. (<year>2021</year>) <volume>2</volume>:<fpage>168</fpage>&#x2013;<lpage>78</lpage>. doi: <pub-id pub-id-type="doi">10.1177/26320843211061287</pub-id></citation></ref>
<ref id="ref36"><label>36.</label> <citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname><given-names>L</given-names></name> <name><surname>Ashton</surname><given-names>CM</given-names></name></person-group>. <article-title>Chart review. A need for reappraisal</article-title>. <source>Eval Health Prof</source>. (<year>1997</year>) <volume>20</volume>:<fpage>146</fpage>&#x2013;<lpage>63</lpage>. doi: <pub-id pub-id-type="doi">10.1177/016327879702000203</pub-id></citation></ref>
</ref-list>
<glossary>
<def-list>
<title>Abbreviations</title>
<def-item>
<term>CAD</term>
<def>
<p>Coronary artery disease</p>
</def>
</def-item>
<def-item>
<term>CHF</term>
<def>
<p>Congestive heart failure</p>
</def>
</def-item>
<def-item>
<term>CI</term>
<def>
<p>Confidence interval</p>
</def>
</def-item>
<def-item>
<term>CKD</term>
<def>
<p>Chronic kidney disease</p>
</def>
</def-item>
<def-item>
<term>COPD</term>
<def>
<p>Chronic obstructive pulmonary disease</p>
</def>
</def-item>
<def-item>
<term>CRF</term>
<def>
<p>Case report forms</p>
</def>
</def-item>
<def-item>
<term>DM</term>
<def>
<p>Diabetes mellitus</p>
</def>
</def-item>
<def-item>
<term>ECMO</term>
<def>
<p>Extracorporeal membrane oxygenation</p>
</def>
</def-item>
<def-item>
<term>EHR</term>
<def>
<p>Electronic health records</p>
</def>
</def-item>
<def-item>
<term>HFNC</term>
<def>
<p>High flow nasal canula</p>
</def>
</def-item>
<def-item>
<term>HTN</term>
<def>
<p>Hypertension</p>
</def>
</def-item>
<def-item>
<term>ICU</term>
<def>
<p>Intensive care unit</p>
</def>
</def-item>
<def-item>
<term>IMV</term>
<def>
<p>Invasive mechanical ventilation</p>
</def>
</def-item>
<def-item>
<term>IRB</term>
<def>
<p>Institutional review boards</p>
</def>
</def-item>
<def-item>
<term>LOS</term>
<def>
<p>Length of stay</p>
</def>
</def-item>
<def-item>
<term>NIMV</term>
<def>
<p>Non-invasive mechanical ventilation</p>
</def>
</def-item>
<def-item>
<term>PCC</term>
<def>
<p>Pearson interclass correlation coefficient</p>
</def>
</def-item>
<def-item>
<term>REDCap</term>
<def>
<p>Research electronic data capture software</p>
</def>
</def-item>
<def-item>
<term>SCCM</term>
<def>
<p>Society of critical care medicine</p>
</def>
</def-item>
<def-item>
<term>SD</term>
<def>
<p>Standard deviations</p>
</def>
</def-item>
<def-item>
<term>SE</term>
<def>
<p>Standard error</p>
</def>
</def-item>
<def-item>
<term>SFTP</term>
<def>
<p>Secure file transfer platform</p>
</def>
</def-item>
<def-item>
<term>SOP</term>
<def>
<p>Standard operating procedure</p>
</def>
</def-item>
<def-item>
<term>SQL</term>
<def>
<p>Sequential query language</p>
</def>
</def-item>
<def-item>
<term>VIRUS</term>
<def>
<p>Viral Infection and Respiratory Illness Universal Study</p>
</def>
</def-item>
<def-item>
<term>VIRUS-PEEP</term>
<def>
<p>VIRUS Practical EHR Export Pathways group</p>
</def>
</def-item>
<def-item>
<term>WHO</term>
<def>
<p>World Health Organization</p>
</def>
</def-item>
<def-item>
<term>WHO-ISARIC</term>
<def>
<p>World Health Organization- International Severe Acute Respiratory And Emerging Infection Consortium</p>
</def>
</def-item>
</def-list>
</glossary>
</back>
</article>