<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN" "http://jats.nlm.nih.gov/publishing/1.2/JATS-journalpublishing1.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="1.2" xml:lang="en">
    <front>
        <journal-meta>
            <journal-id journal-id-type="pmc">F1000Research</journal-id>
            <journal-title-group>
                <journal-title>F1000Research</journal-title>
            </journal-title-group>
            <issn pub-type="epub">2046-1402</issn>
            <publisher>
                <publisher-name>F1000 Research Limited</publisher-name>
                <publisher-loc>London, UK</publisher-loc>
            </publisher>
        </journal-meta>
        <article-meta>
            <article-id pub-id-type="doi">10.12688/f1000research.160046.1</article-id>
            <article-categories>
                <subj-group subj-group-type="heading">
                    <subject>Research Article</subject>
                </subj-group>
                <subj-group>
                    <subject>Articles</subject>
                </subj-group>
            </article-categories>
            <title-group>
                <article-title>Research data volume and quality derived from a specialist disease registry versus routine electronic health records</article-title>
                <fn-group content-type="pub-status">
                    <fn>
                        <p>[version 1; peer review: awaiting peer review]</p>
                    </fn>
                </fn-group>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Hamilton</surname>
                        <given-names>Roseanna</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Varakliotis</surname>
                        <given-names>Socrates</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-5265-8205</uri>
                    <xref ref-type="aff" rid="a1">1</xref>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Cancemi</surname>
                        <given-names>Dario</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a1">1</xref>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Spiridou</surname>
                        <given-names>Anastasia</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0001-6576-0244</uri>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Shah</surname>
                        <given-names>Mohsin</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Key</surname>
                        <given-names>Daniel</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Wedderburn</surname>
                        <given-names>Lucy R</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Supervision</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a1">1</xref>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Sebire</surname>
                        <given-names>Neil James</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Supervision</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0001-5348-9063</uri>
                    <xref ref-type="corresp" rid="c1">a</xref>
                    <xref ref-type="aff" rid="a1">1</xref>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <aff id="a1">
                    <label>1</label>NIHR Great Ormond Street Hospital Biomedical Research Centre, London, England, UK</aff>
                <aff id="a2">
                    <label>2</label>University College London Institute of Child Health, London, England, UK</aff>
            </contrib-group>
            <author-notes>
                <corresp id="c1">
                    <label>a</label>
                    <email xlink:href="mailto:n.sebire@ucl.ac.uk">n.sebire@ucl.ac.uk</email>
                </corresp>
                <fn fn-type="conflict">
                    <p>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>27</day>
                <month>1</month>
                <year>2025</year>
            </pub-date>
            <pub-date pub-type="collection">
                <year>2025</year>
            </pub-date>
            <volume>14</volume>
            <elocation-id>132</elocation-id>
            <history>
                <date date-type="accepted">
                    <day>8</day>
                    <month>1</month>
                    <year>2025</year>
                </date>
            </history>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2025 Hamilton R et al.</copyright-statement>
                <copyright-year>2025</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access article distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <self-uri content-type="pdf" xlink:href="https://f1000research.com/articles/14-132/pdf"/>
            <abstract>
                <sec>
                    <title>Objective</title>
                    <p>This study aims to compare data availability and analytic results for patients using matched data items from a dedicated disease registry versus data extracted directly from an electronic patient record (EPR) system and a trusted research environment (TRE).</p>
                </sec>
                <sec>
                    <title>Methods</title>
                    <p>Data from patients enrolled in the National JDM Cohort and Biomarker Study (JDCBS) was compared with routine EPR data from the same patients attending a specialist children&#x2019;s hospital between 2019-2021. Data from both sources were extracted, de-identified, and analysed within a trusted research environment adhering to NHS security standards. Descriptive statistics, visualizations, and statistical comparisons were performed.</p>
                </sec>
                <sec>
                    <title>Results</title>
                    <p>Of the 688 registry patients in total, 270 attended one specialist hospital with EPR data available. The EPR system yielded 328,527 data points on these patients compared to 40,673 from the registry, including 2-10 fold more data items across data categories. Diagnoses were more numerous in the EPR data, while registry data captured more comprehensive medication records. Laboratory test results were 10 times more frequent in EPR data, including a broader range of test types. Despite higher data volume in EPR, the clinical significance of the additional data points remains uncertain.</p>
                </sec>
                <sec>
                    <title>Conclusion</title>
                    <p>Routine EPR data can effectively replicate much disease registry data with a larger volume of data points, potentially offering additional analytical possibilities. However, specific targeted registry data collection remains valuable for certain data elements. A hybrid approach, utilizing both routine EPR data and focused registry collection, could optimise healthcare research by reducing costs and avoiding duplication.</p>
                </sec>
            </abstract>
            <kwd-group kwd-group-type="author">
                <kwd>EPR data</kwd>
                <kwd>registry data</kwd>
                <kwd>data quality</kwd>
                <kwd>healthcare research</kwd>
                <kwd>trusted research environment.</kwd>
            </kwd-group>
            <funding-group>
                <award-group id="fund-1">
                    <funding-source>Myositis UK Charity</funding-source>
                </award-group>
                <award-group id="fund-2" xlink:href="https://doi.org/10.13039/501100000265">
                    <funding-source>Medical Research Council</funding-source>
                    <award-id>MR/N003322/1</award-id>
                </award-group>
                <award-group id="fund-3" xlink:href="https://doi.org/10.13039/100010269">
                    <funding-source>Wellcome Trust UK</funding-source>
                    <award-id>085860</award-id>
                </award-group>
                <award-group id="fund-4" xlink:href="https://doi.org/10.13039/501100001279">
                    <funding-source>Henry Smith Charity and Great Ormond Street Children's Charity</funding-source>
                    <award-id>V1268</award-id>
                </award-group>
                <award-group id="fund-5">
                    <funding-source>Cure JM</funding-source>
                    <award-id>GOSH042019</award-id>
                </award-group>
                <award-group id="fund-6">
                    <funding-source>Remission Charity</funding-source>
                </award-group>
                <award-group id="fund-7" xlink:href="https://doi.org/10.13039/100015391">
                    <funding-source>Myositis Association</funding-source>
                </award-group>
                <award-group id="fund-8" xlink:href="https://doi.org/10.13039/501100019256">
                    <funding-source>NIHR Great Ormond Street Hospital Biomedical Research Centre</funding-source>
                </award-group>
                <award-group id="fund-9" xlink:href="https://doi.org/10.13039/501100000317">
                    <funding-source>Action Medical Research UK</funding-source>
                    <award-id>SP4252</award-id>
                </award-group>
                <award-group id="fund-10">
                    <funding-source>Tiny Hearts Society</funding-source>
                </award-group>
                <award-group id="fund-11" xlink:href="https://doi.org/10.13039/501100012041">
                    <funding-source>Versus Arthritis</funding-source>
                    <award-id>14518</award-id>
                    <award-id>20164</award-id>
                    <award-id>21593</award-id>
                </award-group>
                <award-group id="fund-12">
                    <funding-source>Cathal Hayes Research Foundation</funding-source>
                </award-group>
                <funding-statement>Funding for the UK JDM Cohort and Biomarker study (JDCBS) has been by grants from Cathal Hayes research Foundation;  &#13;
&#13;
the Wellcome Trust UK [085860]; Action Medical Research UK [SP4252]; the Myositis UK Charity, Arthritis Research UK now Versus Arthritis [14518, 20164, 21593]; the Henry Smith Charity and Great Ormond Street Children's Charity [V1268]; Tiny Hearts Society, The Myositis Association, Remission Charity, Cure JM (GOSH042019), the Medical Research Council [MR/N003322/1], and infrastructure through the National Institute for Health Research (NIHR) via the NIHR-Biomedical Research Centre at GOSH and GOSHCC.  &#13;
&#13;
The views expressed are those of the authors and not necessarily those of the NHS, the NIHR or the Department of Health and Social Care. </funding-statement>
                <funding-statement>
                    <italic>The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</italic>
                </funding-statement>
            </funding-group>
        </article-meta>
    </front>
    <body>
        <sec id="sec5" sec-type="intro">
            <title>Introduction</title>
            <p>Traditionally, research studies use specifically collected data since it has been reported that historically there may be quality issues with using routine EPR data and manual validation may be required at organisational level to make such data meaningful.
                <sup>
                    <xref ref-type="bibr" rid="ref1">1</xref>
                </sup> Several dimensions of study data quality are generally described such as completeness, accuracy, concordance, plausibility, all of which have been variably applied to routine EPR data for research
                <sup>
                    <xref ref-type="bibr" rid="ref2">2</xref>
                </sup> and corresponding EPR data quality assessment frameworks have been proposed.
                <sup>
                    <xref ref-type="bibr" rid="ref3">3</xref>,
                    <xref ref-type="bibr" rid="ref4">4</xref>
                </sup>
            </p>
            <p>However, previous studies have reported the feasibility of using routine EPR data to determine quality of care in various settings, such congenital heart disease, with reasonable data availability, although reporting reduced reliability of billing codes for identification of certain specific conditions, especially those that may be rare.
                <sup>
                    <xref ref-type="bibr" rid="ref5">5</xref>
                </sup> In addition, the use of routine EPR data to generate and populate disease-specific registries has been described (in this context, registries are regarded as list of patients and associated data items for individuals who either share a common diagnosis, procedure or treatment).
                <sup>
                    <xref ref-type="bibr" rid="ref6">6</xref>
                </sup>
            </p>
            <p>Despite assumptions regarding EPR systems, published evidence suggests that data quality in studies using routine EPR data to be acceptable. In one study directly comparing quality of cancer registry data versus the same data derived directly (but manually extracted) from EPR systems, there was 95% concordance for most features including important elements such as primary site, laterality and histologic type.
                <sup>
                    <xref ref-type="bibr" rid="ref7">7</xref>
                </sup> Another study directly compared manual and electronic data collection from a critical care EPR system and reported that the EPR derived data from over 241,000 patients undergoing more than 400,000 surgical procedures was good quality; for example, only around 1% had missing race/ethnicity data, all cases had an associated procedure code and 84% had outpatient medication recorded.
                <sup>
                    <xref ref-type="bibr" rid="ref8">8</xref>
                </sup> In another study, data were extracted from specific fields from a sample of around 200 patients admitted to adult intensive care units, either via manual study specific collection or extracted directly from the EPR system; concordance was high with full agreement for 11/30 variables (35%) and median Kappa score for categorical variables of 0.99 (IQR 0.92-1.00). Interestingly, in this studies where discordancy was present, manual transcription errors were the most common source of discrepancies.
                <sup>
                    <xref ref-type="bibr" rid="ref9">9</xref>
                </sup> Whilst routine extracted data therefore shows good scores for dimensions such as consistency, completeness, and uniqueness, there may be apparent &#x2018;missing&#x2019; routine data, which is mainly related to the different levels of granularity required for secondary purposes compared to clinical coding.
                <sup>
                    <xref ref-type="bibr" rid="ref10">10</xref>
                </sup> In addition, EPR data quality is often variable specific. For example, in a study of hypertension surveillance, blood pressure measurements and medications were well-recorded but other elements such as smoking or alcohol status were often missing or incomplete,
                <sup>
                    <xref ref-type="bibr" rid="ref11">11</xref>
                </sup> hence the need for assessment of EPR data quality for specific purposes.
                <sup>
                    <xref ref-type="bibr" rid="ref12">12</xref>
                </sup>
            </p>
            <p>Registry data is generally regarded as good quality. In a review of paediatric cardiac surgery registries using &gt;50,000 data elements in around 500 subjects reported that only 3% of data elements were missing, with 98% accuracy of recording.
                <sup>
                    <xref ref-type="bibr" rid="ref13">13</xref>
                </sup> However, it should be noted that even registries may have data quality issues. One retrospective chart review study of around 400 medical records from 14 hospitals compared to matched registry data reported only 80-90% accuracy for surgery type, chemotherapy and radiotherapy for a range of disorders, with accuracy related to the experience of those extracting the data.
                <sup>
                    <xref ref-type="bibr" rid="ref14">14</xref>
                </sup>
            </p>
            <p>There is therefore recent interest in more widespread use of using routine EPR data for clinical trials and surveillance, partly since this approach would be cheaper and quicker than conducting dedicated trials and studies, but also since real-world effects can be estimated from such data, which may be important. A metanalysis of 84 studies using routine data and 463 traditional trials, reported that routine data studies demonstrate around 20% less favourable treatment effects compared to formal trials for the same conditions across a range of outcomes.
                <sup>
                    <xref ref-type="bibr" rid="ref15">15</xref>
                </sup> The aim of this study is therefore to directly compare data availability and analytic results for the same patients using matched data items extracted from a dedicated existing registry versus data extracted directly from an EPR system and trusted research environment.</p>
        </sec>
        <sec id="sec6" sec-type="methods">
            <title>Methods</title>
            <p>The National JDM Cohort and Biomarker Study (JDCBS) is a voluntary cohort study: at the time of analysis JDCBS included data from 688 patients over a 20 year period, for which patients and families consent to the storage and use of their data and biosamples for secondary medical research purposes.
                <sup>
                    <xref ref-type="bibr" rid="ref16">16</xref>
                </sup> For the purposes of simplicity for the current analysis the JDCBS will simply be referred to as the &#x2018;registry&#x2019; to distinguish this dataset from the EPR derived dataset from the same patients. Patients in the JDCBS attending GOSH were identified and data extracted into a secure data environment (GOSH DRE). Routine EPR data from the same patients were also directly extracted from the EPR system (Epic), linked and all data deidentified and stored in the secure data environment for analysis. The GOSH DRE is a trusted research environment (TRE) meeting NHS security and ICT standards including ISO27001 and ISO27010 and the architecture and routine deidentified extracted EPR data an HRA REC approved research database.</p>
            <p>Data was extracted and provisioned by the GOSH data steward team and only the non-identifiable linked data made available through a secure workspace for subsequent analysis by the research team using R. Descriptive statistics and visualisations were carried out on both datasets and statistical comparison performed by comparison of proportions and Mann-Whitney U tests as appropriate for discrete and continuous variables.</p>
        </sec>
        <sec id="sec7" sec-type="results">
            <title>Results</title>
            <p>Of a total of 688 patients registered overall in JCDBS there were 286 patients who had been managed at GOSH of whom EPR data from the study period 2019-2021 inclusive was available for 270. Total data points available for these patients for the categories of laboratory test results, medications, diagnoses and visits were 40,673 in the registry and 328,527 from EPR, 8-fold more data items over the same period. The data volumes varied by data type but 2-10 fold more data items were available for the same categories using routine extracted EPR data (
                <xref ref-type="table" rid="T1">
Table 1</xref>).</p>
            <table-wrap id="T1" orientation="portrait" position="float">
                <label>
Table 1. </label>
                <caption>
                    <title>Number of data items per category from GOSH patients included in the registry based on registry data and routinely extracted EPR data for the same patient group over the same time period.</title>
                </caption>
                <table content-type="article-table" frame="hsides">
                    <thead>
                        <tr>
                            <th align="left" colspan="1" rowspan="1" valign="top">Characteristic</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">JDCBS</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">
DRE</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Laboratory tests</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">27464</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">284150</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Medications</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">9532</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">34868</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Diagnoses</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">268</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">2772</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Visits</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">3409</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">6777</td>
                        </tr>
                    </tbody>
                </table>
            </table-wrap>
            <p>For some categories, such as Diagnoses, there were significantly more diagnoses recorded from the routine EPR data versus registry data, but this is likely a result of only targeted registry data collection in addition to the recording of many non-specific &#x2018;diagnoses&#x2019; within EPR coded data (
                <xref ref-type="table" rid="T2">
Table 2</xref>/
                <xref ref-type="fig" rid="f1">
Figure 1</xref>).</p>
            <table-wrap id="T2" orientation="portrait" position="float">
                <label>
Table 2. </label>
                <caption>
                    <title>Median and mean number of &#x2018;diagnoses&#x2019; recorded per patient in EPR DRE versus JDCBS registry for the same patient group over the same time period.</title>
                </caption>
                <table content-type="article-table" frame="hsides">
                    <thead>
                        <tr>
                            <th align="left" colspan="1" rowspan="1" valign="top">Patients</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">DRE (N=270)</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">
JDCBS (N=286)</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Total diagnoses (N)</td>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Minimum</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Median (IQR)</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">6 (3-14)</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">1 (1-1)</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Mean (SD)</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">10.27&#x00b1;12.01</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.94&#x00b1;0.24</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Maximum</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">68</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">1</td>
                        </tr>
                    </tbody>
                </table>
            </table-wrap>
            <fig fig-type="figure" id="f1" orientation="portrait" position="float">
                <label>
Figure 1. </label>
                <caption>
                    <title>Chart of number of distinct diagnoses recorded from routine EPR and registry data.</title>
                    <p>The number of distinct diagnoses recorded from routine EPR data is more than 50x greater than number of diagnoses recorded in the registry. However, examination of the most common diagnoses provided in each demonstrate that registry diagnoses only include those high-level diagnoses directly related to the primary medical condition, whereas EPR data additionally includes comorbidities and other conditions.</p>
                </caption>
                <graphic id="gr1" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/175857/d1db5792-5f76-4dca-9a24-4f8afeb3efd3_figure1.gif"/>
            </fig>
            <p>In contrast, the median number of medications recorded per patient is less in the routine DRE data compared to registry data (median 16 versus median 26 respectively), likely explained by the fact that GOSH EPR data only includes medications prescribed by the hospital whereas registry data may have included all medications used regardless of whether prescribed in other hospitals or primary care as well as GOSH (
                <xref ref-type="table" rid="T3">
Table 3</xref>).</p>
            <table-wrap id="T3" orientation="portrait" position="float">
                <label>
Table 3. </label>
                <caption>
                    <title>Median and mean number of medications per patient in EPR DRE data and JDCBS registry data for the same patient group over the same time period.</title>
                </caption>
                <table content-type="article-table" frame="hsides">
                    <thead>
                        <tr>
                            <th align="left" colspan="1" rowspan="1" valign="top"/>
                            <th align="left" colspan="1" rowspan="1" valign="top">DRE (N=270)</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">
JCDBS (N=286)</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Total medications</td>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Minimum</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Median (IQR)</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">16.00 (0.00, 99.50)</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">26.00 (8.00, 49.00)</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Mean (SD)</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">129.14 (&#x00b1;477.70)</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">33.33 (&#x00b1;30.96)</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Maximum</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">7200</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">178</td>
                        </tr>
                    </tbody>
                </table>
            </table-wrap>
            <p>The category with the greatest fold difference in data items was however, laboratory testing, with 10-fold more laboratory test results available per patient in the EPR derived dataset compare to the registry data, likely a consequence of recording of only selected laboratory tests within the registry (
                <xref ref-type="table" rid="T4">
Table 4</xref>).</p>
            <table-wrap id="T4" orientation="portrait" position="float">
                <label>
Table 4. </label>
                <caption>
                    <title>Number of laboratory test results available per patient from the EPR DRE dataset and the JDCBS registry data for the same patient group over the same time period., demonstrating many more fold laboratory results in the routine EPR data.</title>
                </caption>
                <table content-type="article-table" frame="hsides">
                    <thead>
                        <tr>
                            <th align="left" colspan="1" rowspan="1" valign="top"/>
                            <th align="left" colspan="1" rowspan="1" valign="top">DRE (N=270)</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">
JDCBS (N=286)</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Total laboratory tests</td>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Minimum</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Median (IQR)</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">830 (432-1364)</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">74 (21-152)</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Mean (SD)</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">1052.4&#x00b1;875.0</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">96.0&#x00b1;89.4</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Maximum</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">5911</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">523</td>
                        </tr>
                    </tbody>
                </table>
            </table-wrap>
            <p>Further examination of the JDCBS and EPR DRE data laboratory test result types demonstrates a broadly similar pattern of testing with a marked predominance of repeated standard tests, specifically tests such as full blood count (
                <xref ref-type="fig" rid="f2">
Figure 2</xref>). However, the overall number of distinct laboratory tests recorded in the registry was 34 compared to &gt;1300 laboratory test types overall in routine data, likely a consequence of registry data collection of only specific predefined tests (
                <xref ref-type="fig" rid="f3">
Figure 3</xref>).</p>
            <fig fig-type="figure" id="f2" orientation="portrait" position="float">
                <label>
Figure 2. </label>
                <caption>
                    <title>Bat chart of most common laboratory tests.</title>
                    <p>The 20 most common laboratory test types available in the JCDBS registry (Top) and the RPR DRE data (Bottom) are provided, demonstrating broadly similar patterns of relative test frequencies despite around 10-fold more test results available through the EPR data.</p>
                </caption>
                <graphic id="gr2" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/175857/d1db5792-5f76-4dca-9a24-4f8afeb3efd3_figure2.gif"/>
            </fig>
            <fig fig-type="figure" id="f3" orientation="portrait" position="float">
                <label>
Figure 3. </label>
                <caption>
                    <title>Chart of number and types of unique laboratory test types in the JDCBS and EPR DRE datasets.</title>
                </caption>
                <graphic id="gr3" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/175857/d1db5792-5f76-4dca-9a24-4f8afeb3efd3_figure3.gif"/>
            </fig>
            <p>For test types present in both datasets, significantly more values were available from the routine EPR DRE data, resulting in small differences in overall result distributions of uncertain clinical significance (
                <xref ref-type="fig" rid="f4">
Figure 4</xref>).</p>
            <fig fig-type="figure" id="f4" orientation="portrait" position="float">
                <label>
Figure 4. </label>
                <caption>
                    <title>Examples of small differences in distributions of laboratory test values between JCDBS registry data and routine EPR DRE data.</title>
                    <p>Box whisker plots illustrating median, IQR and ranges for serum albumin (top) and serum LDH (bottom) from both datasets showing small differences in distribution of values.</p>
                </caption>
                <graphic id="gr4" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/175857/d1db5792-5f76-4dca-9a24-4f8afeb3efd3_figure4.gif"/>
            </fig>
            <p>However, the presence of magnitudes greater data items in the EPR DRE data allows potential additional analysis types to be carried out. For example, there is a relationship between the total number of laboratory tests performed and total number of EPR diagnoses recorded per patient (
                <xref ref-type="fig" rid="f5">
Figure 5</xref>).</p>
            <fig fig-type="figure" id="f5" orientation="portrait" position="float">
                <label>
Figure 5. </label>
                <caption>
                    <title>Relationship between total number of laboratory tests performed and total number of EPR diagnoses.</title>
                    <p>Using EPR DRE data per patient.</p>
                </caption>
                <graphic id="gr5" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/175857/d1db5792-5f76-4dca-9a24-4f8afeb3efd3_figure5.gif"/>
            </fig>
            <p>Finally, since registry data only includes selected attendances, the average number of outpatient visits recorded is more than twice as many per patient from routine EPR data as from the registry (
                <xref ref-type="table" rid="T5">
Table 5</xref>).</p>
            <table-wrap id="T5" orientation="portrait" position="float">
                <label>
Table 5. </label>
                <caption>
                    <title>Average number (median, mean) of hospital attendances per patient during the same time period from EPR DRE and JDCBS registry.</title>
                </caption>
                <table content-type="article-table" frame="hsides">
                    <thead>
                        <tr>
                            <th align="left" colspan="1" rowspan="1" valign="top"/>
                            <th align="left" colspan="1" rowspan="1" valign="top">DRE (N=270)</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">
JDCBS (N=286)</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Total outpatient visits</td>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Minimum</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">1</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Median (IQR)</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">22.00 (13.00. 30.75)</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">10.00 (4.00, 18.00)</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Mean (SD)</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">25.10&#x00b1;22.7</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">11.92&#x00b1;9.17</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Maximum</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">254</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">48</td>
                        </tr>
                    </tbody>
                </table>
            </table-wrap>
        </sec>
        <sec id="sec8" sec-type="discussion">
            <title>Discussion</title>
            <p>The findings of this study have demonstrated that, firstly, it is possible to use extracted routine electronic health record data to generate a dataset that recapitulated many aspects of data found in a dedicated registry. Second, there are orders of magnitude more data points available from use of routine EPR data, including data elements which may be of interest or use but were not initially considered or appreciated when setting up the registry, especially for elements such as laboratory test results. Third, use of all data points, such as from all laboratory tests performed may demonstrate small but significant differences in test result distributions indicating that registry data may not represent unselected routine clinical data, although, in general, distributions were similar and any differences of uncertain significance. Fourth, additional analyses may be possible using more extensive routine EPR data due to ease of linkage regarding time points and data point interrelationships.</p>
            <p>However, despite the additional volume of data available from routine EPR extractions, it remains uncertain whether this provides significant additional clinical or research insight, since the most common data items are repeat testing of common standard tests and it is likely that only a minority of test results are contributory to diagnosis and management. Finally, it should be recognised that only specific pre-defined data elements are collected in registries, often with well-described data dictionaries, whereas routine EPR derived data includes all items but is dependent on clinical data entry and coding; this is most apparent in the &#x2018;diagnoses&#x2019; section, which in registry data is confined to the main underlying JDM related diagnosis but in the routine EPR dataset additionally includes a wide range of associated or incidental diagnoses and non-specific symptoms.</p>
            <p>The findings do, however, indicate that significant effort and cost may potentially be avoided by more widespread use of routine extracted EPR data to support, augment or replace dedicated disease-specific registries, since comparative analysis suggests that findings from both dataset types are broadly similar. However, there are differences in several aspects, such as hospital visits, medications and laboratory tests indicating that both approaches may be optimal for particular circumstances. Therefore, optimal healthcare research should begin to question the routine setting up of registries to duplicate data held in EPR systems and that significant resource savings could be achieved by using routine EPR data wherever possible, but enhanced by highly targeted registry collection for specific data elements, thus a customised hybrid approach to achieve maximum benefit.</p>
            <p>It should be emphasised that the findings presented in the present study are based on routinely extracted EPR data from a single centre, which already has an established digital research environment and underlying processes and architecture for large scale extraction, deidentification and harmonisation of electronic patient record data elements. The disease registry, in contrast, collects data from many different centres, each of whom may have different electronic patient record systems, and markedly different levels of digital and data maturity. Therefore, scaling the approach of extracting and collating or mapping similar data from multiple different organisations&#x2019; clinical systems adds significant complexity with aspects such as data harmonisation, ontology mapping and unification of formatting, all of which are essentially avoided by manual entry into a research data capture tool associated with a registry. The disadvantage of this approach is that such registries requires both initial setup and ongoing management resources with additional potential transcription and data entry errors, as well as intrinsic limitations to the extent of data collection since there is a human resource burden directly proportional to the number of participants and number of data elements. It is hoped that future developments towards unifying healthcare data specifications for interoperability, such as HL7 FHIRv4 may significantly reduce the complexity of multicentre data harmonisation for such use cases, but at present few clinical systems support such tools or APIs beyond basic functionality.</p>
            <p>In this rare disease example (juvenile onset dermatomyositis, annual incidence 2-3 per million children per year),
                <sup>
                    <xref ref-type="bibr" rid="ref17">17</xref>
                </sup> collection from many centres has a clear benefit to research to power studies adequately and enable cross centre comparison of outcomes and practice. In addition, the agreement of an internationally agreed data set for research and clinical use in this condition has facilitated comparisons of registries across countries.
                <sup>
                    <xref ref-type="bibr" rid="ref18">18</xref>
                </sup> In the future it would therefore be feasible to standardise the elements recorded in the EPR specific to this condition and then use routinely extracted, large datasets for research. This might provide significant savings of time and duplicated effort to the research community, enable a wider range of data elements to be incorporated into high dimensional modelling or disease outcome and so lead to significant benefit for patients.</p>
        </sec>
        <sec id="sec9">
            <title>Ethics and consent</title>
            <p>The study was approved by the appropriate REC (for JDM data: REC 01/3/022 20/03/2023 and specific analysis was approved by the JDCBS Study Steering Committee with all patients having provided written consent for the use of their data in research; use of EPR data for research through the GOSH SDE is approved under REC REC reference: 21/LO/0646), 13/10/2021).</p>
        </sec>
    </body>
    <back>
        <sec id="sec12" sec-type="data-availability">
            <title>Data availability statement</title>
            <sec id="sec13">
                <title>Underlying data</title>
                <p>Individual patient level data is not available since REC approvals do not support data sharing beyond the platform without additional approval. Summary data is available on request through the corresponding author.</p>
            </sec>
        </sec>
        <ack>
            <title>Acknowledgements</title>
            <p>The Juvenile Dermatomyositis Cohort Biomarker Study &amp; Repository (JDCBS) would like to thank all of the patients and their families who contributed to the JDCBS research study. We thank all local research coordinators and principal investigators who have made this research possible. Clinical, research and administrative contributors to JDCBS members were as follows:</p>
            <p>Dr Kate Armon, and Ms Louise Coke, Ms Julie Cook and Ms Amy Nichols (Norfolk and Norwich University Hospitals);Dr Liza McCann, Mr Ian Roberts, Dr Eileen Baildam, Ms Louise Hanna, Ms Olivia Lloyd, Susan Wadeson, Ms Michelle Andrews, Ms Olivia Lloyd and Mrs Jane Roach (The Royal Liverpool Children&#x2019;s Hospital, Alder Hey, Liverpool); Dr Phil Riley, Ms Ann McGovern, and Ms Verna Cuthbert (Royal Manchester Children&#x2019;s Hospital, Manchester); Dr Clive Ryder, Ms Janis Scott, Ms Beverley Thomas, Professor Taunton Southwood, Dr Eslam Al-Abadi and Ms Ruth Howman (Birmingham Children&#x2019;s Hospital, Birmingham); Dr Sue Wyatt, Mrs Gillian Jackson, Dr Mark Wood, Dr Tania Amin, Dr Vanessa VanRooyen, Ms Deborah Burton, Ms Louise Turner, Ms Heather Rostron, and Ms Sarah Hanson (Leeds General Infirmary, Leeds); Dr Joyce Davidson, Dr Janet Gardner-Medwin, Dr Neil Martin, Ms Sue Ferguson, Ms Liz Waxman and Mr Michael Browne, Ms Roisin Boyle, Ms Emily Blyth and Ms Susanne Cathcart (The Royal Hospital for Sick Children, Yorkhill, Glasgow); Dr Mark Friswell, Professor Helen Foster, Ms Alison Swift, Dr Sharmila Jandial, Ms Vicky Stevenson, Ms Debbie Wade, Dr Ethan Sen, Dr Eve Smith, Ms Lisa Qiao, Mr Stuart Watson and Ms Claire Duong, Dr Stephen Crulley, Mr Andrew Davies, Miss Caroline Miller, Ms Lynne Bell, Dr Flora McErlane, Dr Sunil Sampath, Dr Josh Bennet, Mrs Sharon King and Mr Christopher Long (Great North Children&#x2019;s Hospital, Newcastle); Dr Helen Venning, Dr Rangaraj Satyapal, Mrs Elizabeth Stretton, Ms Mary Jordan, Dr Ellen Mosley, Ms Anna Frost, Ms Lindsay Crate, Dr Kishore Warrier, Ms Stefanie Stafford, Mrs Brogan Wrest, Ms Chia-Ping Chou, and Mr Paul Pryce (Queens Medical Centre, Nottingham); Professor Lucy Wedderburn, Dr Clarissa Pilkington, Dr Nathan Hasson, Dr Muthana Al-Obadi, Dr Giulia Varnier, Dr Sandrine Lacassagne, Ms Sue Maillard, Mrs Lauren Stone, Ms Elizabeth Halkon, Ms Virginia Brown, Ms Audrey Juggins, Dr Sally Smith, Ms Sian Lunt, Ms Elli Enayat, Ms Hemlata Varsani, Ms Laura Kassoumeri, Miss Laura Beard, Ms Katie Arnold, Mrs Yvonne Glackin, Ms Stephanie Simou, Dr Beverley Almeida, Dr Kiran Nistala, Dr Raquel Marques, Dr Claire Deakin, Dr Parichat Khaosut, Ms Stefanie Dowle, Dr Charalampia Papadopoulou, Dr Shireena Yasin, Dr Christina Boros, Dr Meredyth Wilkinson, Dr Chris Piper, Ms Cerise Johnson-Moore, Ms Lucy Marshall, Ms Kathryn O&#x2019;Brien, Ms Emily Robinson, Mr Dominic Igbelina, Dr Polly Livermore, Dr Socrates Varakliotis, Ms Rosie Hamilton, Ms Lucy Nguyen, Mr Dario Cancemi, Dr Ovgu Kul Cinar, Dr Elena Moraitis and Dr Hannah Peckham (Great Ormond Street Hospital, London); Dr Kevin Murray (Princess Margaret Hospital, Perth, Western Australia); Dr Coziana Ciurtin, Dr John Ioannou, Mrs Caitlin Clifford, Ms Linda Suffield and Ms Laura Hennelly (University College London Hospital, London); Ms Helen Lee, Ms Sam Leach, Ms Helen Smith, Dr Anne-Marie McMahon, Ms Heather Chisem, Ms Jeanette Hall and Ms Amy Huffenberger (Sheffield&#x2019;s Children&#x2019;s Hospital, Sheffield); Dr Nick Wilkinson, Ms Emma Inness, Ms Eunice Kendall, Mr David Mayers, Ms Ruth Etherton, Ms Danielle Miller and Dr Kathryn Bailey (Oxford University Hospitals, Oxford); Dr Jacqui Clinch, Ms Natalie Fineman, Ms Helen Pluess-Hall, Ms Suzanne Sketchley, Ms Melanie Marsh, Ms Anna Fry, Ms Maisy Dawkins-Lloyd and Ms Mashal Asif (Bristol Royal Hospital for Children, Bristol); Dr Joyce Davidson, Margaret Connon and Ms Lindsay Vallance (Royal Aberdeen Children&#x2019;s Hospital); Dr Kirsty Haslam, Ms Charlene Bass-Woodcock, Ms Trudy Booth, and Ms Louise Akeroyd (Bradford Teaching Hospitals); Dr Alice Leahy, Amy Collier, Rebecca Cutts, Emma Macleod, Dr Hans De Graaf, Dr Brian Davidson, Sarah Hartfree, Ms Elizabeth Fofana and Ms Lorena Caruana (University Hospital Southampton) and all the Children, Young people and their families who have contributed to this research.</p>
        </ack>
        <ref-list>
            <title>References</title>
            <ref id="ref1">
                <label>1</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Benin</surname>
                            <given-names>AL</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Fenick</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Herrin</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>How good are the data? Feasible approach to validation of metrics of quality derived from an outpatient electronic health record.</article-title>
                    <source>

                        <italic toggle="yes">Am. J. Med. Qual.</italic>
</source>
                    <year>2011</year>;<volume>26</volume>:<fpage>441</fpage>&#x2013;<lpage>451</lpage>.
                    <pub-id pub-id-type="pmid">21926280</pub-id>
                    <pub-id pub-id-type="doi">10.1177/1062860611403136</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref2">
                <label>2</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Weiskopf</surname>
                            <given-names>NG</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Weng</surname>
                            <given-names>C</given-names>
                        </name>
</person-group>:
                    <article-title>Methods and dimensions of electronic health record data quality assessment: enabling reuse for clinical research.</article-title>
                    <source>

                        <italic toggle="yes">J. Am. Med. Inform. Assoc.</italic>
</source>
                    <year>2013</year>;<volume>20</volume>:<fpage>144</fpage>&#x2013;<lpage>151</lpage>.
                    <pub-id pub-id-type="pmid">22733976</pub-id>
                    <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000681</pub-id>
                    <pub-id pub-id-type="pmcid">PMC3555312</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref3">
                <label>3</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Reimer</surname>
                            <given-names>AP</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Milinovich</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Madigan</surname>
                            <given-names>EA</given-names>
                        </name>
</person-group>:
                    <article-title>Data quality assessment framework to assess electronic medical record data for use in research.</article-title>
                    <source>

                        <italic toggle="yes">Int. J. Med. Inform.</italic>
</source>
                    <year>2016</year>;<volume>90</volume>:<fpage>40</fpage>&#x2013;<lpage>47</lpage>.
                    <pub-id pub-id-type="pmid">27103196</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2016.03.006</pub-id>
                    <pub-id pub-id-type="pmcid">PMC4845906</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref4">
                <label>4</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kahn</surname>
                            <given-names>MG</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Callahan</surname>
                            <given-names>TJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Barnard</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A Harmonized Data Quality Assessment Terminology and Framework for the Secondary Use of Electronic Health Record Data.</article-title>
                    <source>

                        <italic toggle="yes">EGEMS (Wash DC).</italic>
</source>
                    <year>2016</year>;<volume>4</volume>:<fpage>1244</fpage>.
                    <pub-id pub-id-type="pmid">27713905</pub-id>
                    <pub-id pub-id-type="doi">10.13063/2327-9214.1244</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref5">
                <label>5</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Broberg</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Sklenar</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Burchill</surname>
                            <given-names>L</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Feasibility of Using Electronic Medical Record Data for Tracking Quality Indicators in Adults with Congenital Heart Disease.</article-title>
                    <source>

                        <italic toggle="yes">Congenit. Heart Dis.</italic>
</source>
                    <year>2015</year>;<volume>10</volume>:<fpage>E268</fpage>&#x2013;<lpage>E277</lpage>.
                    <pub-id pub-id-type="pmid">26239748</pub-id>
                    <pub-id pub-id-type="doi">10.1111/chd.12289</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref6">
                <label>6</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kannan</surname>
                            <given-names>V</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Fish</surname>
                            <given-names>JS</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mutz</surname>
                            <given-names>JM</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Rapid Development of Specialty Population Registries and Quality Measures from Electronic Health Record Data*. An Agile Framework.</article-title>
                    <source>

                        <italic toggle="yes">Methods Inf. Med.</italic>
</source>
                    <year>2017</year>;<volume>56</volume>:<fpage>e74</fpage>&#x2013;<lpage>e83</lpage>.
                    <pub-id pub-id-type="pmid">28930362</pub-id>
                    <pub-id pub-id-type="doi">10.3414/ME16-02-0031</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref7">
                <label>7</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Schouten</surname>
                            <given-names>LJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Jager</surname>
                            <given-names>JJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Brandt</surname>
                            <given-names>PA</given-names>
                            <prefix>van den</prefix>
                        </name>
</person-group>:
                    <article-title>Quality of cancer registry data: a comparison of data provided by clinicians with those of registration personnel.</article-title>
                    <source>

                        <italic toggle="yes">Br. J. Cancer.</italic>
</source>
                    <year>1993</year>;<volume>68</volume>:<fpage>974</fpage>&#x2013;<lpage>977</lpage>.
                    <pub-id pub-id-type="pmid">8217612</pub-id>
                    <pub-id pub-id-type="doi">10.1038/bjc.1993.464</pub-id>
                    <pub-id pub-id-type="pmcid">PMC1968711</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref8">
                <label>8</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Corey</surname>
                            <given-names>KM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Helmkamp</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Simons</surname>
                            <given-names>M</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Assessing Quality of Surgical Real-World Data from an Automated Electronic Health Record Pipeline.</article-title>
                    <source>

                        <italic toggle="yes">J. Am. Coll. Surg.</italic>
</source>
                    <year>2020</year>;<volume>230</volume>:<fpage>295</fpage>&#x2013;<lpage>305e12</lpage>.
                    <pub-id pub-id-type="pmid">31945461</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.jamcollsurg.2019.12.005</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref9">
                <label>9</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Brundin-Mather</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Soo</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Zuege</surname>
                            <given-names>DJ</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Secondary EMR data for quality improvement and research: A comparison of manual and electronic data collection from an integrated critical care electronic medical record system.</article-title>
                    <source>

                        <italic toggle="yes">J. Crit. Care.</italic>
</source>
                    <year>2018</year>;<volume>47</volume>:<fpage>295</fpage>&#x2013;<lpage>301</lpage>.
                    <pub-id pub-id-type="pmid">30099330</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.jcrc.2018.07.021</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref10">
                <label>10</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Aerts</surname>
                            <given-names>H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kalra</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>S&#x00e1;ez</surname>
                            <given-names>C</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Quality of Hospital Electronic Health Record (EHR) Data Based on the International Consortium for Health Outcomes Measurement (ICHOM) in Heart Failure: Pilot Data Quality Assessment Study.</article-title>
                    <source>

                        <italic toggle="yes">JMIR Med. Inform.</italic>
</source>
                    <year>2021</year>;<volume>9</volume>:<fpage>e27842</fpage>.
                    <pub-id pub-id-type="pmid">34346902</pub-id>
                    <pub-id pub-id-type="doi">10.2196/27842</pub-id>
                    <pub-id pub-id-type="pmcid">PMC8374665</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref11">
                <label>11</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Garies</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>McBrien</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Quan</surname>
                            <given-names>H</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A data quality assessment to inform hypertension surveillance using primary care electronic medical record data from Alberta, Canada.</article-title>
                    <source>

                        <italic toggle="yes">BMC Public Health.</italic>
</source>
                    <year>2021</year>;<volume>21</volume>:<fpage>264</fpage>.
                    <pub-id pub-id-type="pmid">33530975</pub-id>
                    <pub-id pub-id-type="doi">10.1186/s12889-021-10295-w</pub-id>
                    <pub-id pub-id-type="pmcid">PMC7852125</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref12">
                <label>12</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ozonze</surname>
                            <given-names>O</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Scott</surname>
                            <given-names>PJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hopgood</surname>
                            <given-names>AA</given-names>
                        </name>
</person-group>:
                    <article-title>Automating Electronic Health Record Data Quality Assessment.</article-title>
                    <source>

                        <italic toggle="yes">J. Med. Syst.</italic>
</source>
                    <year>2023</year>;<volume>47</volume>:<fpage>23</fpage>.
                    <pub-id pub-id-type="pmid">36781551</pub-id>
                    <pub-id pub-id-type="doi">10.1007/s10916-022-01892-2</pub-id>
                    <pub-id pub-id-type="pmcid">PMC9925537</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref13">
                <label>13</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Nathan</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Jacobs</surname>
                            <given-names>ML</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gaynor</surname>
                            <given-names>JW</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Completeness and Accuracy of Local Clinical Registry Data for Children Undergoing Heart Surgery.</article-title>
                    <source>

                        <italic toggle="yes">Ann. Thorac. Surg.</italic>
</source>
                    <year>2017</year>;<volume>103</volume>:<fpage>629</fpage>&#x2013;<lpage>636</lpage>.
                    <pub-id pub-id-type="pmid">27726857</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.athoracsur.2016.06.111</pub-id>
                    <pub-id pub-id-type="pmcid">PMC5253303</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref14">
                <label>14</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Cheng</surname>
                            <given-names>C-Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Chiang</surname>
                            <given-names>C-J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hsieh</surname>
                            <given-names>C-H</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Is quality of registry treatment data related to registrar experience and workload? A study of Taiwan cancer registry data.</article-title>
                    <source>

                        <italic toggle="yes">J. Formos. Med. Assoc.</italic>
</source>
                    <year>2018</year>;<volume>117</volume>:<fpage>1093</fpage>&#x2013;<lpage>1100</lpage>.
                    <pub-id pub-id-type="pmid">29329964</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.jfma.2017.12.012</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref15">
                <label>15</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Mc Cord</surname>
                            <given-names>KA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ewald</surname>
                            <given-names>H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Agarwal</surname>
                            <given-names>A</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Treatment effects in randomised trials using routinely collected data for outcome assessment versus traditional trials: meta-research study.</article-title>
                    <source>

                        <italic toggle="yes">BMJ.</italic>
</source>
                    <year>2021</year>;<volume>372</volume>:<fpage>n450</fpage>.
                    <pub-id pub-id-type="doi">10.1136/bmj.n450</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref16">
                <label>16</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Martin</surname>
                            <given-names>N</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Krol</surname>
                            <given-names>P</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Smith</surname>
                            <given-names>S</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A national registry for juvenile dermatomyositis and other paediatric idiopathic inflammatory myopathies: 10 years&#x2019; experience; the Juvenile Dermatomyositis National (UK and Ireland) Cohort Biomarker Study and Repository for Idiopathic Inflammatory Myopathies.</article-title>
                    <source>

                        <italic toggle="yes">Rheumatology (Oxford).</italic>
</source>
                    <year>2011</year>;<volume>50</volume>:<fpage>137</fpage>&#x2013;<lpage>145</lpage>.
                    <pub-id pub-id-type="pmid">20823094</pub-id>
                    <pub-id pub-id-type="doi">10.1093/rheumatology/keq261</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref17">
                <label>17</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Papadopoulou</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Chew</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wilkinson</surname>
                            <given-names>MGL</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Juvenile idiopathic inflammatory myositis: an update on pathophysiology and clinical care.</article-title>
                    <source>

                        <italic toggle="yes">Nat. Rev. Rheumatol.</italic>
</source>
                    <year>2023</year>;<volume>19</volume>:<fpage>343</fpage>&#x2013;<lpage>362</lpage>.
                    <pub-id pub-id-type="pmid">37188756</pub-id>
                    <pub-id pub-id-type="doi">10.1038/s41584-023-00967-9</pub-id>
                    <pub-id pub-id-type="pmcid">PMC10184643</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref18">
                <label>18</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>McCann</surname>
                            <given-names>LJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Pilkington</surname>
                            <given-names>CA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Huber</surname>
                            <given-names>AM</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Development of a consensus core dataset in juvenile dermatomyositis for clinical use to inform research.</article-title>
                    <source>

                        <italic toggle="yes">Ann. Rheum. Dis.</italic>
</source>
                    <year>2018</year>;<volume>77</volume>:<fpage>241</fpage>&#x2013;<lpage>250</lpage>.
                    <pub-id pub-id-type="pmid">29084729</pub-id>
                    <pub-id pub-id-type="doi">10.1136/annrheumdis-2017-212141</pub-id>
                    <pub-id pub-id-type="pmcid">PMC5816738</pub-id>
                </mixed-citation>
            </ref>
        </ref-list>
    </back>
</article>
