<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN" "http://jats.nlm.nih.gov/publishing/1.2/JATS-journalpublishing1.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="1.2" xml:lang="en">
    <front>
        <journal-meta>
            <journal-id journal-id-type="pmc">F1000Research</journal-id>
            <journal-title-group>
                <journal-title>F1000Research</journal-title>
            </journal-title-group>
            <issn pub-type="epub">2046-1402</issn>
            <publisher>
                <publisher-name>F1000 Research Limited</publisher-name>
                <publisher-loc>London, UK</publisher-loc>
            </publisher>
        </journal-meta>
        <article-meta>
            <article-id pub-id-type="doi">10.12688/f1000research.130043.1</article-id>
            <article-categories>
                <subj-group subj-group-type="heading">
                    <subject>Research Article</subject>
                </subj-group>
                <subj-group>
                    <subject>Articles</subject>
                </subj-group>
            </article-categories>
            <title-group>
                <article-title>The identification of retro-DNAs in primate genomes as DNA transposons mobilizing via retrotransposition</article-title>
                <fn-group content-type="pub-status">
                    <fn>
                        <p>[version 1; peer review: 1 approved with reservations]</p>
                    </fn>
                </fn-group>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Tang</surname>
                        <given-names>Wangxiangfu</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Software</role>
                    <role content-type="http://credit.niso.org/">Validation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="corresp" rid="c1">a</xref>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Liang</surname>
                        <given-names>Ping</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Funding Acquisition</role>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Supervision</role>
                    <role content-type="http://credit.niso.org/">Validation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0003-4423-0636</uri>
                    <xref ref-type="corresp" rid="c2">b</xref>
                    <xref ref-type="aff" rid="a1">1</xref>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <aff id="a1">
                    <label>1</label>Department of Biological Sciences, Brock University, St. Catharines, Ontario, L2S 3A1, Canada</aff>
                <aff id="a2">
                    <label>2</label>Centre of Biotechnology, Brock University, St. Catharines, Ontario, L2S 3A1, Canada</aff>
            </contrib-group>
            <author-notes>
                <corresp id="c1">
                    <label>a</label>
                    <email xlink:href="mailto:tangwanxiangfu@gmail.com">tangwanxiangfu@gmail.com</email>
                </corresp>
                <corresp id="c2">
                    <label>b</label>
                    <email xlink:href="mailto:pliang@brocku.ca">pliang@brocku.ca</email>
                </corresp>
                <fn fn-type="conflict">
                    <p>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>9</day>
                <month>3</month>
                <year>2023</year>
            </pub-date>
            <pub-date pub-type="collection">
                <year>2023</year>
            </pub-date>
            <volume>12</volume>
            <elocation-id>255</elocation-id>
            <history>
                <date date-type="accepted">
                    <day>28</day>
                    <month>2</month>
                    <year>2023</year>
                </date>
            </history>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2023 Tang W and Liang P</copyright-statement>
                <copyright-year>2023</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access article distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <self-uri content-type="pdf" xlink:href="https://f1000research.com/articles/12-255/pdf"/>
            <abstract>
                <p>
                    <bold>Background:</bold> Mobile elements (MEs) constitute a major portion of the genome in primates and other higher eukaryotes, and they play important role in genome evolution and gene function. MEs can be divided into two fundamentally different classes: DNA transposons which transpose in the genome in a &#x201c;cut-and-paste&#x201d; style, and retrotransposons which propagate in a &#x201c;copy-and-paste&#x201d; fashion via a process involving transcription and reverse-transcription. In primate genomes, DNA transposons are mostly dead, while many retrotransposons are still highly active. We report here the identification of a new type of MEs, which we call &#x201c;retro-DNAs&#x201d;, for their combined characteristics of these two fundamentally different ME classes.</p>
                <p>
                    <bold>Methods:</bold> A comparative computational genomic approach was used to analyze the reference genome sequences of 10 primate species consisting of five apes, four monkeys, and marmoset.</p>
                <p>
                    <bold>Results:</bold> From our analysis, we identified a total of 1,750 retro-DNAs, representing 748 unique insertion events in the genomes of ten primate species including human. These retro-DNAs contain sequences of DNA transposons but lack the terminal inverted repeats (TIRs), the hallmark of DNA transposons. Instead, they show characteristics of retrotransposons, such as polyA tails, longer target-site duplications (TSDs), and the &#x201c;TT/AAAA&#x201d; insertion site motif, suggesting the use of the L1-based 
                    <underline>t</underline>arget-
                    <underline>p</underline>rimed 
                    <underline>r</underline>everse 
                    <underline>t</underline>ranscription (TPRT) mechanism. At least 40% of these retro-DNAs locate into genic regions, presenting potentials for impacting gene function. More interestingly, some retro-DNAs, as well as their parent sites, show certain levels of expression, suggesting that they have the potential to create more retro-DNA copies in the present primate genomes.</p>
                <p>
                    <bold>Conclusions:</bold> Although small in number, the identification of these retro-DNAs reveals a new mechanism for propagating DNA transposons in primate genomes without active canonical DNA transposon activity. Our data also suggest that the TPRT machinery may transpose a wider variety of DNA sequences in the genomes.</p>
            </abstract>
            <kwd-group kwd-group-type="author">
                <kwd>Primates</kwd>
                <kwd>DNA transposons</kwd>
                <kwd>Retrotransposons</kwd>
                <kwd>Retro-DNA</kwd>
                <kwd>Target-primed reverse transcription</kwd>
            </kwd-group>
            <funding-group>
                <award-group id="fund-1" xlink:href="http://dx.doi.org/10.13039/100012171">
                    <funding-source>Ontario Research Foundation</funding-source>
                </award-group>
                <award-group id="fund-2" xlink:href="http://dx.doi.org/10.13039/501100000038">
                    <funding-source>Natural Sciences and Engineering Research Council of Canada</funding-source>
                    <award-id>RGPIN-2017-06785</award-id>
                </award-group>
                <award-group id="fund-3" xlink:href="http://dx.doi.org/10.13039/501100001805">
                    <funding-source>Canada Foundation for Innovation</funding-source>
                </award-group>
                <award-group id="fund-4" xlink:href="http://dx.doi.org/10.13039/501100001804">
                    <funding-source>Canada Research Chairs</funding-source>
                </award-group>
                <funding-statement>This research was supported by the Natural Sciences and Engineering Research Council of Canada (RGPIN-2017-06785).</funding-statement>
                <funding-statement>
                    <italic>The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</italic>
                </funding-statement>
            </funding-group>
        </article-meta>
    </front>
    <body>
        <sec id="sec1" sec-type="intro">
            <title>Introduction</title>
            <p>Mobile elements (MEs), also known as transposable elements, collectively constitute significant portions of the genomes for most higher organisms, being around 50% for primates.
                <sup>
                    <xref ref-type="bibr" rid="ref1">1</xref>
                </sup>
                <sup>&#x2013;</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref4">4</xref>
                </sup> Despite being initially considered &#x201c;junk&#x201d; DNA, research from the last few decades has demonstrated that MEs make significant contributions to genome evolution and impact gene function via a variety of mechanisms. These mechanisms include, but are not limited to, generation of insertional mutations and genomic instability, creation of new genes and splicing isoforms, exon shuffling, and alteration of gene expression and epigenetic regulation.
                <sup>
                    <xref ref-type="bibr" rid="ref5">5</xref>
                </sup>
                <sup>&#x2013;</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref17">17</xref>
                </sup>
            </p>
            <p>Based on the type of the transposition intermediate, MEs can be divided into two major classes: Class I, called &#x201c;retrotransposons&#x201d;, that utilize an RNA-intermediate to transpose in a &#x201c;copy-and-paste&#x201d; fashion, and Class II termed &#x201c;DNA transposons&#x201d;, that employ a DNA-intermediate to transpose in a &#x201c;cut-and-paste&#x201d; style. Furthermore, despite both having target site duplications (TSDs), the two ME classes differ in sequence characteristics, including consensus sequences unique to each class/subclass, distinct TSD length profile, and presence or absence of terminal inverted repeats (TIRs) or polyA tail, and others.
                <sup>
                    <xref ref-type="bibr" rid="ref17">17</xref>
                </sup>
                <sup>&#x2013;</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref19">19</xref>
                </sup>
            </p>
            <p>Retrotransposons represent the majority of MEs in primate genomes, owing to their &#x201c;copy-and-paste&#x201d; style transposition, which results in direct copy number increase over time, conjugated with their continuing activity over the course of evolution up to the current time. In this process, a retrotransposon is first transcribed into RNA, which is then reverse-transcribed into DNA as a new copy inserting into a new location in the genome.
                <sup>
                    <xref ref-type="bibr" rid="ref20">20</xref>
                </sup> Retrotransposons can be divided into two major subtypes: the long terminal repeats (LTR) and non-LTR retrotransposons, with the former carrying two LTRs flanking the internal viral sequences, while the latter lack LTRs but mostly carry a polyA tail.
                <sup>
                    <xref ref-type="bibr" rid="ref1">1</xref>
                </sup> LTRs represent domesticated retroviruses from those infecting the germline cells of the ancestors and becoming integrated into the host genome, and for this reason, they are also called endogenous retrovirus (ERVs).
                <sup>
                    <xref ref-type="bibr" rid="ref21">21</xref>
                </sup>
                <sup>,</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref22">22</xref>
                </sup> In primate genomes, LTRs exist either as full-length LTRs and can be as long as 10kb, or solo-LTRs around 1kb in length as a product of post-insertion homology-based recombination between the two LTRs, which removes the long internal viral sequences. With several hundred thousand copies, LTRs contribute to ~9% of the genomes with relatively low levels of ongoing activity.
                <sup>
                    <xref ref-type="bibr" rid="ref23">23</xref>
                </sup>
                <sup>&#x2013;</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref26">26</xref>
                </sup>
            </p>
            <p>The non-LTR retrotransposons, as the most successful MEs in primate genomes, contribute to more than 35% of the genomes and more than 80% of all MEs in these genomes with several millions of copies.
                <sup>
                    <xref ref-type="bibr" rid="ref3">3</xref>
                </sup>
                <sup>,</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref4">4</xref>
                </sup> From their sequence features, the currently known non-LTR MEs in primate genomes belong to four subclasses, including short-interspersed nuclear elements (SINEs), long-interspersed nuclear elements (LINEs), SINE-R/VNTR/Alu (SVAs), and processed pseudogenes (
                <italic toggle="yes">i.e.</italic> retro-copies of mRNAs, also called retro-genes).
                <sup>
                    <xref ref-type="bibr" rid="ref4">4</xref>
                </sup>
                <sup>,</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref27">27</xref>
                </sup>
                <sup>&#x2013;</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref31">31</xref>
                </sup> Despite having many differences with regard to their length, consensus sequences, and coding capacity, all subclasses of non-LTR retrotransposons share the common properties of having a 3&#x2019;-polyA tail and the use of target-prime reverse transcription (TPRT) mechanism for retrotransposition.
                <sup>
                    <xref ref-type="bibr" rid="ref31">31</xref>
                </sup>
                <sup>,</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref32">32</xref>
                </sup> Among them, LINE-1s (L1s) as the only subfamily of autonomous non-LTR retrotransposons in the primate genomes provide the TPRT machinery for all other non-autonomous non-LTR retrotransposons. For this reason, all non-LTR retrotransposons share the same &#x201c;TT/AAAA&#x201d; sequence motif at their insertion sites.
                <sup>
                    <xref ref-type="bibr" rid="ref9">9</xref>
                </sup>
                <sup>,</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref32">32</xref>
                </sup>
                <sup>&#x2013;</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref36">36</xref>
                </sup>
            </p>
            <p>In contrast, DNA transposons, initially known as &#x201c;jumping genes&#x201d;, move in genomes using a transposase encoded by autonomous copies.
                <sup>
                    <xref ref-type="bibr" rid="ref1">1</xref>
                </sup> Ten out of the twelve DNA transposon superfamilies are known to excise themselves out from their original locations as double-stranded DNA and move to new sites in the genome, which leads to no direct change in their copy numbers.
                <sup>
                    <xref ref-type="bibr" rid="ref17">17</xref>
                </sup>
                <sup>,</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref19">19</xref>
                </sup> Two of the superfamilies, 
                <italic toggle="yes">Helitrons</italic> and 
                <italic toggle="yes">Mavericks</italic>, transpose through non-canonical mechanisms by utilizing a single-stranded DNA as intermediate, which leads to a &#x201c;copy-and-paste&#x201d; style.
                <sup>
                    <xref ref-type="bibr" rid="ref17">17</xref>
                </sup>
                <sup>,</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref37">37</xref>
                </sup>
                <sup>,</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref38">38</xref>
                </sup> The ten &#x201c;cut-and-paste&#x201d; DNA transposon superfamilies, as well as 
                <italic toggle="yes">Mavericks</italic>, have TIRs and TSDs, while 
                <italic toggle="yes">Helitrons</italic> is the only superfamily with neither TIRs nor TSDs, owing to its rolling-circle mechanism.
                <sup>
                    <xref ref-type="bibr" rid="ref17">17</xref>
                </sup>
                <sup>,</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref37">37</xref>
                </sup> In addition to these aforementioned DNA transposons, there is another group of DNA transposons named miniature inverted-repeat transposable element (MITEs) characterized by the presence of both TSDs and TIRs yet lacking the coding capacity for the transposase.
                <sup>
                    <xref ref-type="bibr" rid="ref39">39</xref>
                </sup> By using DNA transposases encoded by other autonomous DNA transposons, these non-autonomous, short (50-600bp) MITE entries can transpose in the host genome.
                <sup>
                    <xref ref-type="bibr" rid="ref17">17</xref>
                </sup>
                <sup>,</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref40">40</xref>
                </sup>
            </p>
            <p>DNA transposons have been considered inactive in the current primate genomes and have received very little research attention. Lander 
                <italic toggle="yes">et al.</italic> (2001) in their initial human genome analysis concluded that there was no evidence for DNA transposon activity during the past 50 My,
                <sup>
                    <xref ref-type="bibr" rid="ref2">2</xref>
                </sup> while a later study suggested that DNA transposons had been highly active during the early part of primate evolution till ~37 Mya.
                <sup>
                    <xref ref-type="bibr" rid="ref19">19</xref>
                </sup> There has been no report for lineage-specific or species-specific DNA transposons in primate genomes. However, in our recent comparative analysis of species-specific MEs in eight primates from the 
                <italic toggle="yes">Hominidae</italic> and the 
                <italic toggle="yes">Cercopithecidae</italic> families, there was also a total of 2,405 DNA transposons identified to be species-specific in addition to the 228,450 species-specific retrotransposons.
                <sup>
                    <xref ref-type="bibr" rid="ref36">36</xref>
                </sup> As part of efforts to understand the mechanism(s) underlying these species-specific DNA transposons, we performed further comparative analysis across ten primate genomes and identified a new type of non-LTR retrotransposons that have sequences from DNA transposons, but also show some hallmarks of L1-based retrotransposons, which we called &#x201c;retro-DNAs&#x201d;.</p>
        </sec>
        <sec id="sec2" sec-type="results">
            <title>Results</title>
            <sec id="sec3">
                <title>Overall profiles of DNA transposons and lineage-specific retro-DNAs in the ten primate genomes</title>
                <p>To identify all retro-DNA events in the primate genomes, we first identified the diallelic DNA transposons (da-DNAs) that are defined as DNA transposons with both the insertion allele and pre-integration allele identifiable in these genomes. These DNA transposons are likely to be the results of relatively recent transposition events shown as having a low level of sequence divergence from their parent copies, which permits accurate identification of TSDs and TIRs. The starting lists of DNA transposons were based on the RepeatMasker annotation subjected to a consolidation process to ensure the accuracy in identifying DNA transposons with both insertion and pre-integration alleles as well as their TSDs.
                    <sup>
                        <xref ref-type="bibr" rid="ref3">3</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref36">36</xref>
                    </sup> One main type of targets for integration in this case are the ME entries split by insertion of other MEs and non-ME sequences. As shown in 
                    <xref ref-type="table" rid="T1">Table 1</xref>, the number of DNA transposons in the primate genomes dropped ~18% on average after integration, leading to less variation in their numbers across genomes ranging from 324,288 in marmoset to 421,580 in chimpanzee, and averaging at 376,720 copies per genome verse 459,521 per genome before integration. These DNA transposons contributed to a total of ~98 Mbp or ~3.6% of these primate genomes on average (
                    <xref ref-type="table" rid="T1">Table 1</xref>). Various factors could have contributed to the different DNA transposon numbers in these genomes, including, but not limited to, the differences in the versions of RepeatMasker and the ME reference sequences used for ME annotation, the quality of genome assemblies, and probably most importantly the different evolution history of the individual genomes.</p>
                <table-wrap id="T1" orientation="portrait" position="float">
                    <label>Table 1. </label>
                    <caption>
                        <title>Summary of DNA transposons in the 10 primate genomes.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Genomes</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Raw counts</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">integrated counts</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">% count reduction</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">total size (bp)</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">% genome</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">full-length count</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">% full-length</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">diallelic DNA counts</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>hg38 (human)</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">483,994</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">399,590</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">17</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">102,664,356</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">3.5</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">119,368</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">29.9</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">25,933</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>panTro5 (chimp)</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">510,250</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">421,580</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">17</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">107,832,154</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">3.8</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">119,265</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">28.3</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">28,273</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>gorGor4 (gorilla)</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">503,480</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">418,454</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">17</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">106,573,049</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">3.8</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">117,263</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">28.0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">27,386</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>ponAbe2 (orangutan)</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">429,467</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">347,471</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">19</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">93,420,030</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">3.4</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">113,425</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">32.6</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">23,923</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>nomLeu3 (gibbon)</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">438,800</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">363,738</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">17</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">93,531,426</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">3.6</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">108,334</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">29.8</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">24,206</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>macFas5 (crab-eating macaque)</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">443,909</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">359,802</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">19</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">94,910,440</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">3.5</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">109,444</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">30.4</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">26,218</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>rheMac8 (rhesus)</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">486,991</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">401,546</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">18</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">102,546,356</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">3.7</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">111,558</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">27.8</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">28,149</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>papAnu2 (baboon)</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">459,662</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">369,684</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">20</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">97,943,467</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">3.7</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">109,523</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">29.6</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">25,844</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>chlSab2 (green monkey)</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">445,724</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">361,048</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">19</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">95,097,218</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">3.5</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">108,139</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">30.0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">26,252</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>calJac3 (marmoset)</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">392,937</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">324,288</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">17</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">83,220,943</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">3.2</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">91,946</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">28.4</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">34,901</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>Average</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">459,521</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">376,720</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">18</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">97,773,944</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">3.6</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">110,827</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">29.5</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">27,109</td>
                            </tr>
                        </tbody>
                    </table>
                    <table-wrap-foot>
                        <fn-group content-type="footnotes">
                            <fn id="tfn1">
                                <label>
                                    <sup>*</sup>
                                </label>
                                <p>Full-length is defined as &gt;=90% of consensus</p>
                            </fn>
                        </fn-group>
                    </table-wrap-foot>
                </table-wrap>
                <p>Using a multi-way comparative genomics approach modified from our previous analysis of human-specific MEs,
                    <sup>
                        <xref ref-type="bibr" rid="ref36">36</xref>
                    </sup> we identified a total of 271,085 da-DNAs in the 10 primate genomes (
                    <xref ref-type="table" rid="T1">Table 1</xref>). Specifically, for each da-DNA, we require the presence of a pre-integration allele in at least one of the other nine genomes. As shown in 
                    <xref ref-type="table" rid="T1">Table 1</xref>, the number of da-DNAs varied from 23,923 in the orangutan genome to 34,901 in the marmoset, averaging at 27,109 for the 10 genomes. The largest number of da-DNAs in the marmoset was expected for its largest evolutionary distance from the remaining primate species. Notable differences were also seen between genomes with mutually closest evolutionary relationship among the 10 genomes, making these numbers directly comparable for the paired genomes. For example, between the human and chimpanzee genomes, the latter had &gt;10% more da-DNAs than the former (28,273 
                    <italic toggle="yes">versus</italic> 25,933), while between the two macaques, the rhesus genome had ~10% more than the crab-eating macaque genome (28,149 
                    <italic toggle="yes">versus</italic> 26,218) (
                    <xref ref-type="table" rid="T1">Table 1</xref>). In comparison, the species-specific non-LTR retrotransposons in the crab-eating macaque genome were less than 1/8 of that for the rhesus genome (3,039 versus 25,085),
                    <sup>
                        <xref ref-type="bibr" rid="ref3">3</xref>
                    </sup> indicating at least that the lower number of da-DNAs in rhesus genome was not due to genome sequence quality differences.</p>
                <p>By composition in DNA transposon type, the majority of the da-DNAs belonged to the hAT and TcMar superfamilies with the hAT subfamilies (
                    <italic toggle="yes">hAT-Charlie</italic> and hAT-Tip100) contributing to ~57% of da-DNAs and the 
                    <italic toggle="yes">hAT-Charlie</italic> subfamily alone contributing to ~50% of all da-DNAs in all genomes (Table S1, 
                    <xref ref-type="fig" rid="f1">Figure 1A</xref>). The two TcMar families, 
                    <italic toggle="yes">TcMar-Tigger</italic> and 
                    <italic toggle="yes">TcMar-Mariner</italic>, contributed ~33% of da-DNAs, while the remaining families contributed to ~10% of da-DNAs. This composition pattern seems to be quite similar among all genomes, with the orangutan genome having a slightly lower portion from the hAT-Trip100 and 
                    <italic toggle="yes">TcMar-Tigger</italic> families but slightly more from the other families (
                    <xref ref-type="fig" rid="f1">Figure 1A</xref>, Table S1).</p>
                <fig fig-type="figure" id="f1" orientation="portrait" position="float">
                    <label>Figure 1. </label>
                    <caption>
                        <title>The composition of diallelic DNA transposons and retro-DNAs by family in the ten primate genomes.</title>
                        <p>Horizontal stack bar charts&#x00a0;showing the family composition of diallelic DNA transposons (A) and retro-DNAs (B) in each of the 10 primate genomes. The color scheme is the same for both panels.</p>
                    </caption>
                    <graphic id="gr1" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/142770/d3f2af48-2278-4674-87e2-662a8a23b2f0_figure1.gif"/>
                </fig>
            </sec>
            <sec id="sec4">
                <title>Retro-DNAs in the primate genomes possess non-LTR retrotransposon sequence characteristics</title>
                <p>While analyzing the da-DNAs in detail for understanding the possible mechanisms involved, we came across an unusual case of a 201-bp 
                    <italic toggle="yes">Tigger7</italic> DNA transposon from the 
                    <italic toggle="yes">TcMar-Tigger</italic> family located at 
                    <italic toggle="yes">chr4:146335052-146335253</italic> of the human genome (GRCh38), which appears to be a human-specific ME for its absence in the orthologous region in the chimp genome (
                    <xref ref-type="fig" rid="f2">Figure 2A</xref>). Interestingly, this DNA transposon insertion has a 14 bp TSD &#x201c;AAGAGTCCTGGATC&#x201d; that is much longer than TSDs for DNA transposons, and it has no identifiable TIR typical of a DNA transposon (
                    <xref ref-type="fig" rid="f2">Figure 2A</xref>). Furthermore, it has a 27 bp polyA tail at its 3&#x2019;-end and a predicted polyadenylation signal &#x201c;ATTAAA&#x201d; before the polyA tail, all pointing to a non-LTR retrotransposon rather than a canonical 
                    <italic toggle="yes">Tigger7</italic> DNA transposon, which is expected to have TIRs and 2 bp (TA) TSDs. We therefore named it as a &#x201c;retro-DNA&#x201d; for being a retrotransposon-like element derived from a DNA transposon sequence.</p>
                <fig fig-type="figure" id="f2" orientation="portrait" position="float">
                    <label>Figure 2. </label>
                    <caption>
                        <title>Examples of retro-DNAs in different primate genomes.</title>
                        <p>A. A retro-DNA from the human genome (hg38_chr4:146335052-146335253) with the pre-integration allele from the chimpanzee genome (panTro5_chr4:38758218-38758438). B. A retro-DNA from the green monkey genome (chlSab2_chr8:30005081-30005527) with the pre-integration allele from the gibbon genome (nomLeu3_chr8:37535028-37535236); C. A retro-DNA located from the green monkey genome (chlSab2_chrX:73456937-73457324) with the pre-integration allele from the orangutan genome (ponAbe2_chrX:82896142-82896360). D. A retro-DNA located from the human genome (hg38_chr4:38758216-38758442) with the pre-integration allele from green monkey genome (chlSab2_chr27:11529606-11529817). In each panel, the sequence at the top is the insertion allele containing the retro-DNA, and the sequence at the bottom is the pre-integration allele without the retro-DNA. The yellow boxes indicate TSDs, the blue boxes indicate the DNA transposon sequences, while the purple boxes indicate possible polyA tail sequences.</p>
                    </caption>
                    <graphic id="gr2" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/142770/d3f2af48-2278-4674-87e2-662a8a23b2f0_figure2.gif"/>
                </fig>
                <p>Following the identification of this retro-DNA, we searched the human genome and other primate genomes and identified more similar cases, as exampled in 
                    <xref ref-type="fig" rid="f2">Figure 2B-D</xref>. For instance, a 446 bp 
                    <italic toggle="yes">Charlie1a</italic> fragment from the 
                    <italic toggle="yes">hAT-Charlie</italic> family was identified as a retro-DNA in the genome of three primates (green monkey, rhesus, and crab-eating macaque), which has TSDs in 13 bp long but no TIRs (
                    <xref ref-type="fig" rid="f2">Figure 2B</xref>).</p>
                <p>By requiring the presence of longer TSDs (&#x2265;8 bp) and the absence of TIRs
                    <bold>,</bold> we identified a total of 1,750 retro-DNA entries among all da-DNAs using a workflow shown in 
                    <xref ref-type="fig" rid="f3">Figure 3</xref>. By classification, these retro-DNAs consist of 847, 478, 156, 74, and 195 entries from the 
                    <italic toggle="yes">hAT-Charlie</italic>, 
                    <italic toggle="yes">TcMar-Tigger</italic>, hAT-Tip100, 
                    <italic toggle="yes">TcMar-Mariner</italic>, and other families, respectively (
                    <xref ref-type="table" rid="T2">Table 2</xref>). The composition pattern (
                    <xref ref-type="fig" rid="f1">Figure 1B</xref>) was very similar to that of all da-DNAs (
                    <xref ref-type="fig" rid="f1">Figure 1A</xref>), indicating there is no strong bias for retro-DNA towards any particular subfamily among da-DNAs. However, at the genome level, the ratios of retro-DNAs in the orangutan genome from the hAT-Trip100 and 
                    <italic toggle="yes">TcMar-Tigger</italic> families were much lower, while that from the &#x201c;other&#x201d; families was much higher compared to other genomes (25% versus 10%) (
                    <xref ref-type="fig" rid="f1">Figure 1B</xref>). As seen in 
                    <xref ref-type="table" rid="T2">Table 2</xref>, the 1,750 retro-DNAs encompassed all 10 genomes and could be clustered into 748 unique retro-DNA insertion events based on their orthologous relationships. It is worth noting that our list of retro-DNAs may suffer a certain level of false negatives and false positives due to the uses of a set of criteria that might not be optimal and due to the challenges associated with the analysis of MEs and the deficiencies of the reference genome resources, especially for the non-human primates as discussed in our recent study.
                    <sup>
                        <xref ref-type="bibr" rid="ref3">3</xref>
                    </sup>
                </p>
                <fig fig-type="figure" id="f3" orientation="portrait" position="float">
                    <label>Figure 3. </label>
                    <caption>
                        <title>A flow chart for identification of retro-DNAs.</title>
                    </caption>
                    <graphic id="gr3" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/142770/d3f2af48-2278-4674-87e2-662a8a23b2f0_figure3.gif"/>
                </fig>
                <table-wrap id="T2" orientation="portrait" position="float">
                    <label>Table 2. </label>
                    <caption>
                        <title>The distribution of retro-DNAs by subfamilies in the 10 primate genomes.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">DNA transposon family</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Human</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Chimpanzee</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Gorilla</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Orangutan</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Gibbon</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Crab-eating macaque</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Rhesus</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Baboon</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Green monkey</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Marmoset</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Total</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Total (nr)</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">hAT-Charlie</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">100</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">108</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">99</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">58</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">72</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">76</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">79</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">76</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">78</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">101</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">847</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">317</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">hAT-Tip100</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">19</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">17</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">18</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">10</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">19</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">16</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">16</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">13</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">13</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">15</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">156</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">63</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">TcMar-Tigger</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">44</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">51</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">49</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">28</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">47</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">49</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">57</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">36</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">58</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">59</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">478</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">221</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">TcMar-Mariner</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">7</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">8</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">8</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">2</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">6</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">7</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">8</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">7</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">6</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">15</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">74</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">34</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Others</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">17</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">18</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">17</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">56</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">12</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">15</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">17</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">11</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">15</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">17</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">195</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">113</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>All Retro-DNAs</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>187</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>202</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>191</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>154</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>156</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>163</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>177</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>143</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>170</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>207</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>1,750</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>748</bold>
                                </td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>By sequence length, these 748 (after removing orthologous redundancy (
                    <xref ref-type="table" rid="T2">Table 2</xref>)) retro-DNAs averaged at 209 bp (&#x00b1;190 bp) in length, representing in all cases only part of the corresponding family consensus sequences (averaging at 21%) (
                    <xref ref-type="table" rid="T3">Table 3</xref>). While the consensus sequences for DNA transposon families differ in length significantly, ranging from 380 bp for 
                    <italic toggle="yes">TcMar-Mariner</italic> to 1,506 bp for hAT-Tip100, the average length of retro-DNAs seems to be relatively more consistent across the families, ranging from 122 bp for 
                    <italic toggle="yes">TcMar-Mariner</italic> to 251 bp for 
                    <italic toggle="yes">TcMar-Tigger.</italic> Nevertheless, in general, the retro-DNAs from the longer families do have a longer average length (
                    <italic toggle="yes">e.g.</italic> hAT-Tip100) than those from the shorter families, but at lower proportions of their consensus sequences than those with shorter consensus sequences (
                    <italic toggle="yes">e.g. TcMar-Mariner</italic>) (
                    <xref ref-type="table" rid="T3">Table 3</xref>).</p>
                <table-wrap id="T3" orientation="portrait" position="float">
                    <label>Table 3. </label>
                    <caption>
                        <title>The composition of retro-DNA by family and the size information.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">DNA transposon Family</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">copy number</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">% of all retro-DNAs</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Average size (bp)</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Std (bp)</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Average consensus length (bp)</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">% of consensus</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">hAT-Charlie</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">317</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">42.4</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">190</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">110</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">515</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">37</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">TcMar-Tigger</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">221</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">29.5</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">251</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">256</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">1,162</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">22</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">hAT-Tip100</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">63</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">8.4</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">200</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">209</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">1,506</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">13</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">TcMar-Mariner</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">34</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">4.5</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">122</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">115</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">380</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">32</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Other</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">113</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">15.1</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">210</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">200</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">1,053</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">20</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>Total</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>748</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">100</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>209</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>190</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>923</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>21</bold>
                                </td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>Additionally, we examined whether there were any hotspots in these DNA transposon sequences as the source sequences of these retro-DNAs. By using the retro-DNA entries from the Tigger1 DNA transposon subfamily, which is the largest subfamily containing 41 non-redundant retro-DNAs, we generated a frequency plot to show the usage of the consensus sequences by the retro-DNAs. As illustrated in 
                    <xref ref-type="fig" rid="f4">Figure 4</xref>, while all regions of the consensus sequence were covered by the 41 retro-DNAs, the frequency varied substantially from 2.4% to 29.3%, showing that a few regions of the consensus sequence (
                    <italic toggle="yes">e.g.</italic> ~1310-1440 bp and ~1840-2240 bp) were used more frequently than the rest of the regions.</p>
                <fig fig-type="figure" id="f4" orientation="portrait" position="float">
                    <label>Figure 4. </label>
                    <caption>
                        <title>A frequency of the Tigger1 subfamily DNA transposon consensus sequence used for retro-DNA sequences.</title>
                        <p>The plot is based on the data for a total of 41 non-redundant retro-DNA entries from the Tigger1 subfamily.</p>
                    </caption>
                    <graphic id="gr4" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/142770/d3f2af48-2278-4674-87e2-662a8a23b2f0_figure4.gif"/>
                </fig>
                <p>From the total 748 non-redundant retro-DNAs, we identified 176 entries carrying a potential polyA tail (Table S2). We speculate that the relatively low percentage (23.5%) of entries with a polyA tail might be partially due to quicker sequence divergence from post-insertion mutations in the polyA tail regions, which are more prone to random mutations than other regions due to the homopolymer nature. The complete list of the 748 non-redundant retro-DNA entries with their genomic coordinates in all applicable genomes is provided in Supplementary File 1. For these retro-DNA insertion events, we further examined the sequence motifs at the insertion sites and the TSD length distribution pattern. As shown in 
                    <xref ref-type="fig" rid="f5">Figure 5A</xref>, a sequence motif of &#x2018;TT/AAAA&#x2019;, same as the motif for 
                    <italic toggle="yes">Alu</italic>s, L1s, and SVAs (
                    <xref ref-type="fig" rid="f5">Figure 5B</xref>),
                    <sup>
                        <xref ref-type="bibr" rid="ref32">32</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref36">36</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref41">41</xref>
                    </sup> was observed, despite the signal being much weaker. This, nevertheless, serves as a strong indication of their use of the L1-based TPRT machinery.
                    <sup>
                        <xref ref-type="bibr" rid="ref33">33</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref34">34</xref>
                    </sup> As further support, the TSD length distribution peaked at 8 bp (
                    <xref ref-type="fig" rid="f5">Figure 5C</xref>), similar to the second peak seen for the TSDs of human specific L1s, despite missing the major peak at 15 bp observed for the latter (
                    <xref ref-type="fig" rid="f5">Figure 5D</xref>).
                    <sup>
                        <xref ref-type="bibr" rid="ref36">36</xref>
                    </sup>
                </p>
                <fig fig-type="figure" id="f5" orientation="portrait" position="float">
                    <label>Figure 5. </label>
                    <caption>
                        <title>Sequence motifs of pre-integration sites and target site duplications (TSDs) length distribution pattern for retro-DNAs.</title>
                        <p>A. Sequence motif logos for retro-DNAs at the integration sites. B. Sequence motif logos for human-specific L1s at the integration sites, adopted from authors&#x2019; publication.
                            <sup>
                                <xref ref-type="bibr" rid="ref3">3</xref>
                            </sup> C. A line plot showing the distribution of TSD length for retro-DNAs. D. A line plot showing the distribution of TSD length for human-specific L1s, adopted from authors&#x2019; publication.
                            <sup>
                                <xref ref-type="bibr" rid="ref3">3</xref>
                            </sup>
                        </p>
                    </caption>
                    <graphic id="gr5" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/142770/d3f2af48-2278-4674-87e2-662a8a23b2f0_figure5.gif"/>
                </fig>
            </sec>
            <sec id="sec5">
                <title>The species- and lineage specific pattern of retro-DNAs</title>
                <p>We examined the evolutionary timeline of the retro-DNA insertion events by mapping them onto a phylogenetic tree of these primates based on the data in the 
                    <ext-link ext-link-type="uri" xlink:href="http://www.timetree.org">TimeTree database</ext-link>.
                    <sup>
                        <xref ref-type="bibr" rid="ref43">43</xref>
                    </sup> As shown in 
                    <xref ref-type="fig" rid="f6">Figure 6A</xref> (the insert), 450 (60.2%) of these retro-DNAs appeared to be species-specific for being uniquely present in only one genome, while another 295 (39.4%) were found in multiple genomes in a clear lineage-specific pattern. On average, a retro-DNA was shared by two genomes, suggesting an average age older than the species-specific MEs (unique to one species) reported in our earlier study.
                    <sup>
                        <xref ref-type="bibr" rid="ref3">3</xref>
                    </sup> The example shown in 
                    <xref ref-type="fig" rid="f2">Figure 2A</xref> serves as a very clear case of species-specific retro-DNA. As shown in the multiple sequence alignments with its orthologous sequences including its flanking sequences from other eight primate genomes (not locatable in marmoset genome), this 
                    <italic toggle="yes">Tigger7</italic> element was absent from the orthologous sites of all non-human primate genomes (
                    <xref ref-type="fig" rid="f6">Figure 6A</xref>), confirming it as an authentic human-specific retro-DNA. On the contrary, the example shown in 
                    <xref ref-type="fig" rid="f2">Figure 2B</xref> is demonstrated to be a retro-DNA insertion event shared among three of the four monkey species, thus likely as a lineage-specific retro-DNA. As shown in 
                    <xref ref-type="fig" rid="f6">Figure 6B</xref>, this 446 bp 
                    <italic toggle="yes">Charlie1a</italic> fragment was absent in the orthologous regions of the remaining seven primate genomes. Furthermore, it appears that the retro-DNA sequence in these three genomes had been subject to mutation in the polyA tails shown as having variable lengths, agreeing with its relatively older age as a lineage-specific retro-DNA. Similarly, the example shown in 
                    <xref ref-type="fig" rid="f2">Figure 2D</xref> represents an ape lineage-specific retro-DNA for its presence in all ape genomes but absent in all non-ape genomes examined.</p>
                <fig fig-type="figure" id="f6" orientation="portrait" position="float">
                    <label>Figure 6. </label>
                    <caption>
                        <title>The evolutionary timeline of the retro-DNA insertions during the evolution of the ten primate genomes.</title>
                        <p>A. A rooted phylogenetic tree of the ten primate genomes from the TimeTree database (
                            <ext-link ext-link-type="uri" xlink:href="http://www.timetree.org/">http://www.timetree.org/</ext-link>). The numeric values below each branch represent the number of retro-DNA insertion events happened during the corresponding period of primate evolution. The numeric value above each branch represents the millions of years (Mya) for that branch. The evolutionary time for marmoset has been manually corrected from 21.58 MY to 51.02 MY for the correlation analysis in panel B. The table insert below the tree shows the distribution of the retro-DNAs by the degree of conservation among the genomes as measured by the number of genomes owning a retro-DNA. B. A scatter plot between the number of retro-DNA insertion events and their evolutionary age based on the data in panel A. The trend line shows that the number of retro-DNA insertion events is positively correlated with the relative evolutionary distance (R
                            <sup>2</sup> = 0.919).</p>
                    </caption>
                    <graphic id="gr6" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/142770/d3f2af48-2278-4674-87e2-662a8a23b2f0_figure6.gif"/>
                </fig>
                <p>As shown in 
                    <xref ref-type="fig" rid="f6">Figure 6B</xref>, the number of retro-DNA insertional events appears to show a positive linear correlation with the relative evolutionary ages of the species and lineages (R
                    <sup>2</sup> = 0.5463), suggesting that these retro-DNA insertional events occurred at a low but relatively consistent rate during primate evolution.</p>
            </sec>
            <sec id="sec6">
                <title>The genome distribution patterns of retro-DNAs and their parent sites in gene context and expression</title>
                <p>To assess the potential functional impact of these retro-DNAs, we examined their gene context based on the Ensembl gene annotation for these genomes.
                    <sup>
                        <xref ref-type="bibr" rid="ref42">42</xref>
                    </sup> A total of 698 retro-DNAs, representing ~40% of the 1,750 retro-DNAs were located within the genic regions and promoter regions for 734 transcripts from 414 unique genes (
                    <xref ref-type="table" rid="T4">Table 4</xref> and Table S3). The majority of these retro-DNAs were located within the intron regions (699/734 transcripts), while 27 entries were inserted into promoter regions and untranslated regions. The presence of these retro-DNAs in the genic regions provides the potential to impact gene regulation or splicing.</p>
                <table-wrap id="T4" orientation="portrait" position="float">
                    <label>Table 4. </label>
                    <caption>
                        <title>The numbers of retro-DNAs located in the genic regions in the 10 primate genomes.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Genic region
                                    <xref ref-type="table-fn" rid="tfn2">
                                        <sup>*</sup>
                                    </xref>
                                </th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Human</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Chimpanzee</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Gorilla</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Orangutan</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Gibbon</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Crab-eating macaque</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Rhesus</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Baboon</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Green monkey</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Marmoset</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Total</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">NR</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">4</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">1</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">1</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">1</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">1</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">8</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Promoter</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">9</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">5</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">2</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">1</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">1</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">1</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">3</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">22</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">5&#x2032; UTR</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">1</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">1</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">2</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">3&#x2032; UTR</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">1</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">2</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">3</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Intron</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">114</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">78</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">70</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">60</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">61</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">62</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">67</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">42</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">53</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">92</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">699</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>Total</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>128</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>85</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>73</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>62</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>61</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>64</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>69</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>42</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>53</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>97</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>734</bold>
                                </td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>Total (nr)</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>109</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>82</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>70</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>60</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>61</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>63</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>67</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>42</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>53</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>91</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>698</bold>
                                </td>
                            </tr>
                        </tbody>
                    </table>
                    <table-wrap-foot>
                        <fn-group content-type="footnotes">
                            <fn id="tfn2">
                                <label>
                                    <sup>*</sup>
                                </label>
                                <p>, NR: non-coding RNA; UTR: untranslated region</p>
                            </fn>
                        </fn-group>
                    </table-wrap-foot>
                </table-wrap>
                <fig fig-type="figure" id="f7" orientation="portrait" position="float">
                    <label>Figure 7. </label>
                    <caption>
                        <title>Multiple sequence alignment and phylogenetic analysis of retro-DNAs.</title>
                        <p>A. Multiple sequence alignment for a retro-DNA located in the human genome (hg38_chr4:146335052-146335253, the same entry in 
                            <xref ref-type="fig" rid="f1">Figure 1A</xref>) and the corresponding pre-integration sequences from the other eight primate genomes. The pre-integration sequences from the marmoset genome is unavailable likely due to the high level of sequence divergence. B. Multiple sequence alignment for the sequences of a retro-DNA shared among green monkey, crab-eating macaque and rhesus genomes (chlSab2_chr8:30005081-30005527, macFas5_chr8:32527581-32528029, and rheMac8_chr8:31992158-31992606) with the flanking sequences, along with their orthologous pre-integration sequences from 7 other primate genomes. The red highlights indicate possible polyA tails with variable lengths across genomes, while the yellow highlights show the observed target site duplications (TSDs).</p>
                    </caption>
                    <graphic id="gr7" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/142770/d3f2af48-2278-4674-87e2-662a8a23b2f0_figure7.gif"/>
                </fig>
                <p>Further, we identified the potential parent sites for these retro-DNAs by performing a sequence similarity search using their sequences to query the corresponding genome sequences. For each retro-DNA, the best non-self-match was selected as its potential parent site. An example of such a parent-child relationship is shown in 
                    <xref ref-type="fig" rid="f8">Figure 8</xref>, in which a human-specific new retro-DNA event on chromosome 4 is shown to be a child to a much longer Tigger7 (1882 bp) on chromosome 9, which has orthologous copies in other primate genomes, indicating a much older age of the latter and its validity as a parent copy for the former. As shown in Table S4, we identified a total of 715 potential parent sites for the 1,750 retro-DNA entries (or 325 entries for the 748 retro-DNAs after removing the redundancy across species). The failure in finding the parent copies for the remaining entries could be due to the loss of the parent copy as a result of genomic rearrangements or due to incomplete coverage of the genome sequences. Like for the retro-DNAs, we examined the gene context for these potential parent sites, and as shown in Table S5, 351 (49.1%) of these redundant potential retro-DNA parent sites locate to 410 different genic regions for 371 unique genes/transcripts. In these cases, the transcripts of these potential parent sites, likely as part of the transcripts or splicing side-products (
                    <italic toggle="yes">e.g.</italic>, excised intron sequences) of their host genes, might have had the chance to be captured by the L1 TPRT machinery to generate retro-DNAs as in the case of processed pseudogenes/retro-genes. The ratio of genic entries (49.1%) was higher for the parent sites than that for retro-DNAs (~40%), and the implication is discussed later.</p>
                <fig fig-type="figure" id="f8" orientation="portrait" position="float">
                    <label>Figure 8. </label>
                    <caption>
                        <title>Sequence alignment and phylogenetic analysis of a human retro-DNA, its parent copy in the same genome, and its orthologous copies in other genomes.</title>
                        <p>A. Multiple sequence alignment for a retro-DNA in the human genome (hg38_chr4:146335052-146335253) and its parent copy (hg38_chr9:70197633-70197828, limited to the sequence aligned with the retro-DNA) plus the orthologous sequences of the parent copy from the other 9 non-human primate genomes. The red arrows indicates the retro-DNA entry, while the blue arrow indicates the parent copy. SNPs in red vertical boxes are seen among members of the 
                            <italic toggle="yes">Hominidae</italic> group. B. Phylogenetic analysis of the 11 nucleotide sequences from the 10 primate genomes shown in A using the Maximum Likelihood method and Tamura-Nei model.
                            <sup>
                                <xref ref-type="bibr" rid="ref60">60</xref>
                            </sup> The bootstrapped consensus tree inferred from 500 replicates
                            <sup>
                                <xref ref-type="bibr" rid="ref61">61</xref>
                            </sup> is used to represent the evolutionary history of the taxa involved. Branches corresponding to partitions reproduced in less than 50% bootstrap replicates were collapsed. The percentage of replicating trees in which the associated taxa clustered together in the bootstrap test (500 replicates) are shown next to the branches.
                            <sup>
                                <xref ref-type="bibr" rid="ref61">61</xref>
                            </sup> Initial tree(s) for the heuristic search were obtained automatically by applying Neighbor-Joining and BioNJ algorithms to a matrix of pairwise distances estimated using the Maximum Composite Likelihood (MCL) approach followed by selecting the topology with superior likelihood value in logarithmic scale. This analysis involved 11 nucleotide sequences with a total of 222 positions in the final dataset.</p>
                    </caption>
                    <graphic id="gr8" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/142770/d3f2af48-2278-4674-87e2-662a8a23b2f0_figure8.gif"/>
                </fig>
                <p>We also examined the expression level of retro-DNAs and their potential parent sites using RNA-seq data from the Non-Human Primate Reference TRanscriptome (NHPRTR) dataset
                    <sup>
                        <xref ref-type="bibr" rid="ref44">44</xref>
                    </sup> and two other studies
                    <sup>
                        <xref ref-type="bibr" rid="ref45">45</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref46">46</xref>
                    </sup> to see if any of these entries had any transcriptional activity in the present-day primate genomes. For this, we collected a total of 21 transcriptomes for seven primates, excluding orangutan, gibbon, and marmoset, for which no transcriptome data was available at the time of our analysis. To minimize false positives due to the high sequence similarity among ME members in the same family, we included only the reads with a perfect match to the retro-DNAs or their parent site regions and with each read used only once in calculating the expression level. However, we believe that this process has inevitably introduced a certain level of false negatives in the results due to sequence polymorphisms and, therefore, may have led to an underestimation of the retro-DNAs and parent sites&#x2019; expression levels. As seen in 
                    <xref ref-type="table" rid="T5">Tables 5</xref> and S6, 966 loci from the 1,750 retro-DNA and 715 parent sites in these seven primate genomes were shown to have a certain level of expression ranging in fragments per kilobase of transcript per million reads (fpkm) value from 0.0003 to 27.3.</p>
                <table-wrap id="T5" orientation="portrait" position="float">
                    <label>Table 5. </label>
                    <caption>
                        <title>The numbers of expressed retro-DNAs and parent sites in 21 primate transcriptomes.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="2" valign="top">Species</th>
                                <th align="left" colspan="1" rowspan="2" valign="top"># of RNA-seq sets</th>
                                <th align="left" colspan="3" rowspan="1" valign="top">retro-DNAs</th>
                                <th align="left" colspan="3" rowspan="1" valign="top">parent sites</th>
                            </tr>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top"># of entries</th>
                                <th align="left" colspan="1" rowspan="1" valign="top"># of expressed</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">%</th>
                                <th align="left" colspan="1" rowspan="1" valign="top"># of entries</th>
                                <th align="left" colspan="1" rowspan="1" valign="top"># of expressed</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">%</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Human</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">6</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">187</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">93</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">49.7</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">98</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">57</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">58.2</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Chimpanzee</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">2</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">202</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">99</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">49.0</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">101</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">67</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">66.3</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Gorilla</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">1</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">191</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">55</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">28.8</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">99</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">42</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">42.4</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Rhesus</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">4</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">177</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">97</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">54.8</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">64</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">46</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">71.9</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Crab-eating macaque</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">4</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">163</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">115</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">70.6</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">63</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">55</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">87.3</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Baboon</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">2</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">143</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">68</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">47.6</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">53</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">34</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">64.2</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Green monky</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">2</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">170</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">90</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">52.9</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">62</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">48</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">77.4</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Total</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">19</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">1063</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">527</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">49.6</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">478</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">301</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">63.0</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>We further investigated the relationship between retro-DNAs and their parent sites based on their expression levels. Specifically, three human testis transcriptome samples (SRR2040581, SRR2040582, SRR2040583) retrieved from the NCBI SRA (
                    <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/sra">Sequence Read Archive</ext-link>)) were used to analyze the expression level of the retro-DNA/parent site pairs. As shown in 
                    <xref ref-type="fig" rid="f9">Figure 9A</xref>, a total of 66 retro-DNA/parent site pairs were shown to have a certain level of expression (fpkm &gt; 0) for either the retro-DNA or the parent site among the three human testis samples. Notably, among these 66 retro-DNA/parent site pairs, 57 (86.4%) parent sites were shown to be expressed (fpkm &gt; 0) compared to only 42 (63.6%) expressed retro-DNAs (Table S4 and S6, 
                    <xref ref-type="fig" rid="f9">Figure 9A</xref>). This difference might indicate that the generation of a retro-DNA requires the expression of its parent site, while a retro-DNA itself may not be expressive depending on its landing location. Therefore, a higher ratio of transcriptionally active sites can be expected for the parent sites than for the progenies (retro-DNAs). More interestingly, the two parent sites responsible for multiple retro-DNA entries were shown to have the highest levels of expression among the parent sites (
                    <xref ref-type="fig" rid="f9">Figure 9A</xref>). This may suggest that the expression level of the parent sites is positively correlated to their potential in generating retro-DNAs. Furthermore, the ongoing expression of the parent sites suggests that they have the potential to generate more retro-DNAs in the future.</p>
                <fig fig-type="figure" id="f9" orientation="portrait" position="float">
                    <label>Figure 9. </label>
                    <caption>
                        <title>The expression level of retro-DNAs and their parent sites in three human testis transcriptomes.</title>
                        <p>A. A scatter plot based on 66 retro-DNA/parent site pairs which show a certain level of expression (fpkm &gt; 0) for the retro-DNA and/or parent site. The two data points in red with the same value for the parent but different values for the retro-DNA copies point to the same parent copy in human at hg38:chr5:1570263-1570333, and the two data points in blue point to the same parent copy in human at chr5:259441-262665. B. Box plots showing the expression levels of the 66 retro-DNAs and parent sites divided into genic and intergenic groups. Expression data was based on the average fpkm value in the three human testis transcriptomes.</p>
                    </caption>
                    <graphic id="gr9" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/142770/d3f2af48-2278-4674-87e2-662a8a23b2f0_figure9.gif"/>
                </fig>
                <p>We also examined and compared the expression levels of retro-DNAs and their parent sites among gene context-based groups in the three human testis transcriptomes. As shown in 
                    <xref ref-type="fig" rid="f9">Figure 9B</xref>, the average fpkm values of the parent sites were always higher than that of the retro-DNA entries as a whole group or divided into genic and intergenic regions. In addition, the entries located within genic regions showed higher expression than the ones located outside the genic regions for both retro-DNAs and the parent sites (
                    <xref ref-type="fig" rid="f9">Figure 9B</xref>), suggesting that entries located in the genic regions may have more opportunities to be expressed passively as part of the host gene expression. This difference is larger for retro-DNAs than for the parent sites, likely because parent sites had to be expressed regardless of their position in order to be able to generate new copies. None of these differences are statistically significant, likely due to the small sample size.</p>
            </sec>
        </sec>
        <sec id="sec7" sec-type="discussion">
            <title>Discussion</title>
            <sec id="sec8">
                <title>Retro-DNAs as a new type of retrotransposons derived from DNA transposons</title>
                <p>In this study, we focused on a small number of species-specific DNA transposons identified in primate genomes using a computational comparative genomics pipeline previously established for analyzing species-specific retrotransposons in the human genome and seven other genomes.
                    <sup>
                        <xref ref-type="bibr" rid="ref3">3</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref36">36</xref>
                    </sup> Unlike for retrotransposons, for which the ongoing activity during evolution and in the current genomes of primates, as well as their contribution to the lineage- and species-specific MEs, have been well established,
                    <sup>
                        <xref ref-type="bibr" rid="ref3">3</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref32">32</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref47">47</xref>
                    </sup> similar research for DNA transposons in primate genomes remains very scarce. As a matter of fact, as at time of writing, no report of species-specific DNA transposons in these primate genomes has been documented, likely due to lack of effort, as DNA transposons are thought to have become inactive in primate genomes about 37 Mya.
                    <sup>
                        <xref ref-type="bibr" rid="ref17">17</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref19">19</xref>
                    </sup>
                </p>
                <p>In trying to understand the mechanism underlying the mystery species-specific DNA transposon insertions identified in our comparative genome analysis, we spotted a few interesting entries as exemplified by the case shown in 
                    <xref ref-type="fig" rid="f2">Figure 2A</xref>, which manifests the characteristics of non-LTR retrotransposons by having longer TSDs and presence of a polyA tail, while lacking TIRs, the hallmark of new DNA transposon insertions. The remaining cases shown in 
                    <xref ref-type="fig" rid="f2">Figure 2</xref> have the same non-LTR features but do not necessarily have a typical polyA tail. For their non-LTR retrotransposon characteristics, we named them &#x201c;retro-DNA&#x201d; as retrotransposons derived from DNA transposons. We then performed a systematic analysis to look for more of such &#x201c;retro-DNA&#x201d; cases.</p>
                <p>For this, we expanded our search from the strict species-specific DNA retrotransposons, which are defined as those present in only one of the primate genomes,
                    <sup>
                        <xref ref-type="bibr" rid="ref3">3</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref36">36</xref>
                    </sup> to da-DNAs, which are defined as diallelic DNA transposons with the insertion allele and its pre-integration allele (
                    <italic toggle="yes">i.e.</italic>, the orthologous region without the DNA transposon) both present in at least one of the ten genomes we included. We obtained a total of 271,085 da-DNAs, and from these we then specifically searched for retro-DNA cases, which have long TSDs (&#x2265;8bp) and the absences of the TIRs using a protocol shown in 
                    <xref ref-type="fig" rid="f3">Figure 3</xref>. This led to the identification of 1,750 of retro-DNA cases, which represent 748 unique events, covering all ten primate genomes with over half being species-specific and the remaining being lineage-specific covering different lineages in this group of primates (
                    <xref ref-type="fig" rid="f6">Figure 6A</xref>). Our results indicate that the presence of retro-DNAs has occurred in all ten primate genomes included in our analysis and at wide-spectrum of evolutionary time at approximately a constant rate (
                    <xref ref-type="fig" rid="f6">Figure 6</xref>). Furthermore, these retro-DNAs are not limited to a single subfamily, but rather cover all major DNA transposon families, suggesting that the existence of such &#x201c;retro-DNAs&#x201d; is the product of a consistent and common process actioning in primate evolution.</p>
            </sec>
            <sec id="sec9">
                <title>The likely mechanism underlying the generation of retro-DNAs</title>
                <p>Several lines of evidence from our results guided us to propose that these retro-DNAs were the products of the L1-based TPRT machinery, similar to the known non-autonomous non-LTR retrotransposons, 
                    <italic toggle="yes">i.e.</italic>, SINEs, SVAs and processed pseudogenes.
                    <sup>
                        <xref ref-type="bibr" rid="ref9">9</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref33">33</xref>
                    </sup>
                    <sup>&#x2013;</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref36">36</xref>
                    </sup> The major pieces of evidence include the lack of TIRs and the presence of the TPRT insertion site sequence motif and long TSDs. As seen in 
                    <xref ref-type="fig" rid="f5">Figure 5A</xref>, the integration sites of the 748 retro-DNAs display, although at a much weaker signal, are a core sequence motif of &#x201c;TT/AAAA&#x201d;, which is identical to that for non-LTR retrotransposons in the human genome (
                    <xref ref-type="fig" rid="f5">Figure 5B</xref>).
                    <sup>
                        <xref ref-type="bibr" rid="ref34">34</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref36">36</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref41">41</xref>
                    </sup> The TSDs for these retro-DNAs show a dominant peak at 8bp (
                    <xref ref-type="fig" rid="f5">Figure 5C</xref>), which is much longer than that of TSDs typically found for DNA transposons (2 bp) and is similar to the secondary peak of TSD length observed for the human-specific L1s
                    <sup>
                        <xref ref-type="bibr" rid="ref36">36</xref>
                    </sup> (
                    <xref ref-type="fig" rid="f5">Figure 5D</xref>). Furthermore, the presence of parent sites in the same genome for a significant proportion of the retro-DNAs (325/748 or 43.5%) indicates their use of a &#x201c;copy-and-paste&#x201d; rather than the &#x201c;cut-and-paste&#x201d; mechanism used by canonical DNA transposons. The presence of a polyA tail in many (176/748 or 23.5%) of these retro-DNAs provides additional support for their use of the L1-based TPRT mechanism.</p>
                <p>It is worth noting that, as described above, while there is sufficient similarity in sequence features between these retro-DNAs and the known non-LTR retrotransposons for treating these retro-DNAs as a new type of non-LTR retrotransposons, unique aspects of these retro-DNAs are also evident. These include the missing of the major TSD length peak at 15 bp observed for other non-LTR retrotransposons, the low percentage of entries with a polyA tail, and the weaker signal of the sequence motif, &#x201c;TT/AAAA&#x201d;, at the integration sites. All of these unique characteristics might be attributed to the relatively older average age of these retro-DNAs as indicated by the relatively high percentage (298/748 or ~40%) for being lineage-specific (
                    <xref ref-type="fig" rid="f6">Figure 6A</xref>) compared to the non-LTR retrotransposons used in most previous studies for analysis of integration site sequence motifs.
                    <sup>
                        <xref ref-type="bibr" rid="ref9">9</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref33">33</xref>
                    </sup>
                    <sup>&#x2013;</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref36">36</xref>
                    </sup> In other words, the older age of the retro-DNAs leads to higher sequence divergence, which in turn lowers the sensitivity for detecting all of these sequence features. An additional reason for the weaker signal in the insertion site sequence motif for the retro-DNAs could be due to the small sample size. It is also possible that these unique characteristics may suggest that some differences in the detailed retrotransposition process of these DNA transposons, likely regarding the interaction between the retro-DNA transcripts and the ORF1 and ORF2 proteins, may exist between the retro-DNAs and the canonical non-LTR retrotransposons. One known example for this is that Alu transposition does not seem to require ORF1p.
                    <sup>
                        <xref ref-type="bibr" rid="ref32">32</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref48">48</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref49">49</xref>
                    </sup>
                </p>
                <p>It is also worth pointing out that in addition to the well-known types of non-autonomous non-LTRs transposed by the TPRT machinery, including SINEs, SVAs, and retro-genes, evidence suggests that some copies of the LTR-retrotransposon subfamily, HERV-W, might have also been transposed by this mechanism.
                    <sup>
                        <xref ref-type="bibr" rid="ref50">50</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref51">51</xref>
                    </sup> However, these HERV-W sequences are part of retrotransposons and can continue to be transposed using their canonical retrotransposition mechanism. For this reason, we would like to argue that our identification of retro-DNAs is unique and significant in the sense that they represent DNA transposons, which would not be able to transpose anymore in the primate genomes, since their canonical mechanism is no longer active. Overall, the research from this study and others clearly suggests that the L1-based TPRT machinery may be able to transpose a much wider variety of genomic sequences than what are currently known.</p>
            </sec>
            <sec id="sec10">
                <title>The relative retro-DNA activity during primate evolution</title>
                <p>In comparison with the other types of non-autonomous non-LTR retrotransposons, including Alus, SVAs, and processed pseudogenes, in primate genomes,
                    <sup>
                        <xref ref-type="bibr" rid="ref2">2</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref3">3</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref32">32</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref52">52</xref>
                    </sup> the number of retro-DNAs per genome was much lower, averaging at &lt; 200 per genome (Table S2). This number was even substantially lower than that of processed pseudogenes, which represent the smallest class of non-LTR retrotransposons with 10,190 copies in the human genome.
                    <sup>
                        <xref ref-type="bibr" rid="ref53">53</xref>
                    </sup> We reason that the very small copy number of retro-DNAs may primarily attribute to one factor, 
                    <italic toggle="yes">i.e.</italic>, the lack of intrinsic internal promoters to drive their own transcription, leading to an overall low level of their transcripts available for retrotransposition. Retrotransposons carry their intrinsic promoters required for their canonical propagation mechanisms, while a promoter is not required for the canonical DNA transposon activity. This is in agreement with the observation that there is no clear hotspot in the DNA transposon consensus sequences used in generating retro-DNAs, as shown in 
                    <xref ref-type="fig" rid="f4">Figure 4</xref> for Tigger1. Should there be internal promoters driving the transcription, we would expect to observe one or more clear dominant peaks in the frequency of the regions used for retro-DNAs correlated with the location of the internal promoter(s). Without the ability to drive their own transcription, the only way for DNA transposons to get transcribed is to get transcribed as a part of the host gene transcripts. If this is how retro-DNAs were generated, then we would expect to see a high percentage of retro-DNAs having their parent sites located in the genic regions, more specifically in the transcribed regions, 
                    <italic toggle="yes">i.e.</italic> exon and intron regions. By examining the gene context, 351 of the 715 parent sites (49.0%) for the retro-DNAs located in 371 unique genes/transcripts in the ten primate genomes. This ratio was higher than that for all DNA transposons in the genic regions (39%, detailed data not shown) as the expected for random distribution and for that of the retro-DNAs (40% in genic sites including promoters) (
                    <xref ref-type="table" rid="T4">Tables 4</xref> and S5), thus supporting the role of passive expression for the parent sites in generating these retro-DNAs.</p>
                <p>By the same rationale, we would expect that on average the parent sites should have a higher expression level than retro-DNAs since the parent sites were selected to be biased for this by locating in the genic regions, while the location of the retro-DNAs is more or less random, leading to a relatively lower proportion in genic regions than the parent sites as shown in our data (40% verse 49%) (
                    <xref ref-type="table" rid="T4">Table 4</xref>, Table S5). This is supported by the expression data showing that among the 66 retro-DNA/parent site pairs, 57 pairs have parent sites with a fpkm &gt; 0 compared to only 42 expressed entries for retro-DNAs (
                    <xref ref-type="fig" rid="f9">Figure 9A</xref>). Additionally, we identified two parent sites, which are the only sites potentially responsible for generating multiple retro-DNA entries, and they showed the highest levels of expression among the parent sites (
                    <xref ref-type="fig" rid="f9">Figure 9A</xref>). By comparing the expression levels of all parent sites with that of retro-DNAs in the human genome, we can see an overall higher expression for the parent sites (
                    <xref ref-type="fig" rid="f9">Figure 9B</xref>), and this is also true when comparing between the sites in the genic and intergenic regions (
                    <xref ref-type="fig" rid="f9">Figure 9B</xref>). Furthermore, the expression level of parent sites in the genic regions is much higher than their counterparts in the intergenic regions as expected (
                    <xref ref-type="fig" rid="f9">Figure 9B</xref>). Another possible factor leading to the extremely small number of retro-DNAs might be that the sequences of these DNA transposons are much less optimal for TPRT-based retrotransposition than the canonical types of retrotransposons.</p>
                <p>The use of the 10 primate genomes, representing several lineages with a large span in primate evolution, allowed us to examine whether there is any positive correlation between the length of evolutionary span and the number of retro-DNA insertional events. As shown in 
                    <xref ref-type="fig" rid="f6">Figure 6B</xref>, a moderate positive correlation between the two is observed (R
                    <sup>2</sup> = 0.5463), suggesting that the generation of retro-DNAs is relatively steady during the evolution of this group of primates. Furthermore, the observation that many of the retro-DNA parent sites, as well as 966 of the 1773 (~54.5%) retro-DNAs show certain levels of expression in the seven primate transcriptomes (
                    <xref ref-type="table" rid="T5">Table 5</xref> and S6), suggests the possibility of ongoing retro-DNA generation from the parent sites and perhaps also from some retro-DNAs.</p>
            </sec>
            <sec id="sec11">
                <title>Conclusions and future perspectives</title>
                <p>In this study, through a comparative genomic analysis of 10 primates, we report the first identification of a new type of non-autonomous non-LTR retrotransposons derived from DNA transposon sequences. Named as &#x201c;retro-DNAs&#x201d;, these elements represent an additional type of non-LTR retrotransposons after LINE, SINE, SVA, and processed pseudogene, very likely using the same L1-based TPRT mechanism. This work is significant, as the generation of these retro-DNAs serves to propagate DNA transposon sequences in the absence of the canonical DNA transposon activity in primate genomes and the process involves two fundamentally different ME classes. Despite being very small in number, they do contribute to the genetic diversity among primate species along with other MEs. Furthermore, the discovery of these retro-DNAs suggests that the L1-based TRPT machinery may have been used by more diverse types of RNA transcripts than what we currently know. Interesting follow-up work ought to include the verification of the retrotransposition activity of these retro-DNAs and their parent sites using 
                    <italic toggle="yes">in vitro</italic> and in 
                    <italic toggle="yes">vivo</italic> assays and extension of the similar analysis to other types of expressive DNA sequences, such as non-coding RNA genes. In addition, research into the mechanisms underlying the remaining majority of the diallelic DNA transposons would also be very interesting and valuable.</p>
            </sec>
        </sec>
        <sec id="sec12" sec-type="methods">
            <title>Methods</title>
            <sec id="sec13">
                <title>Sources of primate genome sequences</title>
                <p>In this study, we chose to use 10 primate genomes including human, among which eight genomes were included in our previous study for identifying species-specific MEs in primates.
                    <sup>
                        <xref ref-type="bibr" rid="ref3">3</xref>
                    </sup> These 10 primate species include human (GRCh38/UCSC hg38), chimpanzee (May 2016, CSAC Pan_troglodytes-3.0/panTro5), gorilla (Dec 2014, NCBI project 31265/gorGor4.1), orangutan (July 2007, WUSTL version Pongo_albelii-2.0.2/ponAbe2), gibbon (Oct. 2012 GGSC Nleu3.0/nomLeu3.0), green monkey (Mar. 2014 VGC Chlorocebus_sabeus-1.1/chlSab2), crab-eating macaque (Jun. 2013 WashU Macaca_fascicularis_5.0/macFas5), rhesus monkey (November 2015 BCM Mmul_8.0.1/rheMac8), baboon (Anubis) (March 2012 Baylor Panu_2.0/papAnu2), and marmoset (March 2009 WUGSC 3.2/calJac3). The marmoset genome was added to expand the evolutionary span, also serving as an outgroup for the other nine genomes from the ape and monkey groups, while the gibbon genome was added to increase the coverage and evolutionary span of the ape group. All genome sequences in fasta format and the RepeatMasker annotation files were downloaded from the 
                    <ext-link ext-link-type="uri" xlink:href="http://genome.ucsc.edu">UCSC genome website</ext-link> onto our local high performance computing servers for in-house analyses. We have used the most recent genome versions available on the UCSC genome browser website at the time of analysis in all cases except for gorilla, for which there is a newer version (March 2016, GSMRT3/gorGor5) available but not scaffolded into chromosomes, making it inadequate for our analysis.</p>
            </sec>
            <sec id="sec14">
                <title>LiftOver overchain file generation</title>
                <p>A total of 90 liftOver chain files were needed for all possible pair-wise comparisons of the 10 genomes used in this study. These files contain the information linking the orthologous positions in a pair of genomes based on lastZ alignment.
                    <sup>
                        <xref ref-type="bibr" rid="ref54">54</xref>
                    </sup> A total of 22 of these were available and downloaded from the 
                    <ext-link ext-link-type="uri" xlink:href="http://genome.ucsc.edu">UCSC genome website</ext-link>, and another 34 liftOver chain files were generated using a modified version of 
                    <ext-link ext-link-type="uri" xlink:href="http://genome.ucsc.edu">UCSC</ext-link> pipeline RunLastzChain from a previous study.
                    <sup>
                        <xref ref-type="bibr" rid="ref3">3</xref>
                    </sup> The remaining 36 liftOver chain files were newly generated for this study using the same pipeline.</p>
            </sec>
            <sec id="sec15">
                <title>Identification of DNA transposons with diallelic status in the ten primate genomes</title>
                <p>
                    <bold>Pre-processing of DNA transposons:</bold> The starting list of DNA transposons in each primate genome was obtained based on the RepeatMasker ME annotation data from the 
                    <ext-link ext-link-type="uri" xlink:href="https://genome.ucsc.edu">UCSC website</ext-link>. As previously described, we performed a pre-processing to integrate the ME fragments annotated by RepeatMasker back to ME sequences representing the original transposition events.
                    <sup>
                        <xref ref-type="bibr" rid="ref36">36</xref>
                    </sup>
                </p>
                <p>
                    <bold>Identification of DNA transposons with diallelic status:</bold> We modified a previously reported comparative genomics bioinformatics pipeline
                    <sup>
                        <xref ref-type="bibr" rid="ref36">36</xref>
                    </sup> to identify da-DNAs that have the presence of both the insertion and pre-integration alleles in at least one of the 10 primate genomes. Briefly, this pipeline uses a robust multi-way computational comparative genomic approach to determine the presence/absence status of DNA transposons among a group of genomes by using both the whole chromosome alignment-based liftOver tool and the local sequence alignment-based BLAT tool.
                    <sup>
                        <xref ref-type="bibr" rid="ref55">55</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref56">56</xref>
                    </sup> The sequence of a DNA transposon at the insertion site and its two flanking regions in a genome were compared to the sequences of the orthologous regions available in all other genomes. If a DNA transposon is absent from the orthologous regions of any of the other nine genomes not due to the existence of a sequence gap (
                    <italic toggle="yes">i.e.</italic> just missing the insertion), it is selected as a potential candidate of da-DNA subject to further analyses.</p>
            </sec>
            <sec id="sec16">
                <title>Identification of retro-DNAs</title>
                <p>
                    <bold>Identification of TSDs and TIRs:</bold> For the candidate entries from the previous step, using in-house PERL scripts as described previously,
                    <sup>
                        <xref ref-type="bibr" rid="ref36">36</xref>
                    </sup> we performed identification of the TSDs. Additionally, we modified our scripts to identify the TIRs, the hallmark of all cut-and-paste transposons except for 
                    <italic toggle="yes">Helitrons.</italic>
                    <sup>
                        <xref ref-type="bibr" rid="ref17">17</xref>
                    </sup> da-DNA entries without identifiable TSDs or TSD length &lt; 8 bp, as well as entries with identifiable TIRs, were excluded from further analysis. The 8 bp TSD length cutoff was chosen based on our observation for human-specific retrotransposons that 95% of identified TSDs are at least 8 bp long.
                    <sup>
                        <xref ref-type="bibr" rid="ref36">36</xref>
                    </sup> Additionally, we used MiteFinderII, a tool designed to identify miniature inverted-repeat transposable elements,
                    <sup>
                        <xref ref-type="bibr" rid="ref57">57</xref>
                    </sup> to verify that none of our candidate entries contain TIRs.</p>
                <p>
                    <bold>Filtering against retrotransposon transductions:</bold> To ensure the presence of a DNA transposon was a result of active transposition, rather than a passive result of other processes, 
                    <italic toggle="yes">e.g.</italic>, retrotransposition-mediated transductions, we mapped the candidate entries against the known retrotransposons in the ten primate genomes based on their genomic positions. Specifically, the sequences of candidates from the previous step were mapped back onto the host genome using BLAT, followed by removing all entries located within 50 bps to a retrotransposon (excluding entries inserted into a retrotransposon), because such entries could be a result of retrotransposition-mediated transduction. All entries left at this point were considered candidates of &#x201c;retro-DNAs&#x201d; for being retrotransposons derived from DNA transposon sequences but lacking TIRs and having TSD at 8 bp or longer.</p>
                <p>
                    <bold>Identification of polyA tail:</bold> For each candidate retro-DNA, we retrieved the 10 bp sequence from the 3&#x2019; end of the positive-strand (by the DNA transposon consensus sequence). If the sequence contains six or more &#x201c;A&#x201d;, the entry is considered to have a polyA tail.</p>
            </sec>
            <sec id="sec17">
                <title>Clustering retro-DNAs to identify unique retro-DNA events</title>
                <p>The retro-DNA candidates identified from the last step in the 10 primate genomes were subject to a round of &#x201c;all-against-all&#x201d; sequence similarity search using BLAT with the sequences of the retro-DNAs plus the 100 bp of the flanking region on each side. Entries with 95% or higher sequence similarity across the entirety of the sequences including the flanking sequences were identified as one orthologous cluster, representing one retro-DNA insertion event during the evolution of these primates.</p>
            </sec>
            <sec id="sec18">
                <title>Estimating the timeline for retro-DNA insertions</title>
                <p>An organismal phylogenetic tree of the 10 primate genomes with the marmoset genome as the outgroup was obtained from the 
                    <ext-link ext-link-type="uri" xlink:href="http://www.timetree.org">TimeTree database</ext-link>
                    <sup>
                        <xref ref-type="bibr" rid="ref43">43</xref>
                    </sup> and displayed using the Treeview program.
                    <sup>
                        <xref ref-type="bibr" rid="ref58">58</xref>
                    </sup> We then manually added the numbers of non-redundant retro-DNA entries onto the nodes and branches of this tree based on the presence of retro-DNAs in the specific genomes or lineages.</p>
            </sec>
            <sec id="sec19">
                <title>Multiple sequence alignment of retro-DNA and parent sites</title>
                <p>We performed multiple sequence alignment for a few selected retro-DNA entries, including their parent sites. For this, we first collected retro-DNA sequences including 100 bp on both flankings, as well as the orthologous sequences of the parent sites from the rest of primate genomes and performed multiple sequence alignment using the online version of MUltiple Sequence Comparison by Log-Expectation (MUSCLE)
                    <sup>
                        <xref ref-type="bibr" rid="ref59">59</xref>
                    </sup> from the 
                    <ext-link ext-link-type="uri" xlink:href="https://www.ebi.ac.uk/Tools/msa/muscle/">European Bioinformatics Institute website</ext-link>. Phylogenetic analyses in some cases were performed using the Maximum Likelihood method and Tamura-Nei model
                    <sup>
                        <xref ref-type="bibr" rid="ref60">60</xref>
                    </sup> with bootstrapping
                    <sup>
                        <xref ref-type="bibr" rid="ref61">61</xref>
                    </sup> at 500 replications.</p>
            </sec>
            <sec id="sec20">
                <title>Expression analysis of retro-DNAs and their parent copies</title>
                <p>RNA sequencing (RNA-seq) data for the blood and the generic (mixed) samples from chimpanzee, gorilla, crab-eating macaque, rhesus and baboon were retrieved from the Non-Human Primate Reference Transcriptome Resource (NHPRTR)
                    <sup>
                        <xref ref-type="bibr" rid="ref44">44</xref>
                    </sup> for expression analysis of the retro-DNAs and their parent copies. We also collected RNA-seq data for six human testis transcriptomes (three for blood and three for testis)
                    <sup>
                        <xref ref-type="bibr" rid="ref46">46</xref>
                    </sup> and two green monkey transcriptomes.
                    <sup>
                        <xref ref-type="bibr" rid="ref45">45</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref62">62</xref>
                    </sup> The detailed information regarding the NCBI SRA accession numbers and the associated species and tissues is available in Table S6. Tophat2 (version 2.1.1) was used to align the RNA-seq reads to the corresponding reference primate genomes.
                    <sup>
                        <xref ref-type="bibr" rid="ref63">63</xref>
                    </sup> Reads mapped to the retro-DNA/parent copies regions were retrieved in fasta format and aligned back to the reference genome using the NCBI blastn to ensure that each RNA-seq read was only assigned to only one genomic location with perfect match for use to calculate the fpkm values for each DNA transposon using an in-house Perl script.</p>
            </sec>
            <sec id="sec21">
                <title>Facility and software for computational analysis</title>
                <p>The data analysis and figure plotting were performed using a combination of Linux shell scripting, R, and Microsoft Excel. The computational analysis was mostly performed on Compute Canada high-performance computing facilities running 
                    <ext-link ext-link-type="uri" xlink:href="http://computecanada.ca">CentOS Linux</ext-link>.</p>
            </sec>
        </sec>
    </body>
    <back>
        <sec id="sec24" sec-type="data-availability">
            <title>Data availability</title>
            <sec id="sec25">
                <title>Underlying data</title>
                <p>BioStudies: The identification of retro-DNAs in primate genomes as DNA transposons mobilizing via retrotransposition, 
                    <ext-link ext-link-type="uri" xlink:href="https://identifiers.org/biostudies:S-BSST1030">https://identifiers.org/biostudies:S-BSST1030</ext-link>.
                    <sup>

                        <xref ref-type="bibr" rid="ref64">64</xref>
</sup>
                </p>
            </sec>
            <sec id="sec26">
                <title>Extended data</title>
                <p>Analysis code</p>
                <p>The customized perl and shell scripts used for identification of the reported retro-DNAs are available at 
                    <ext-link ext-link-type="uri" xlink:href="https://github.com/pliang64/retro-DNAs">https://github.com/pliang64/retro-DNAs
</ext-link>.</p>
                <p>Archived analysis code at time of publication: 
                    <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5281/zenodo.7682142">https://doi.org/10.5281/zenodo.7682142</ext-link>.
                    <sup>

                        <xref ref-type="bibr" rid="ref65">65</xref>
</sup>
                </p>
                <p>License: 
                    <ext-link ext-link-type="uri" xlink:href="https://www.gnu.org/licenses/gpl-3.0.en.html">GNU GPL-3.0</ext-link>
                </p>
            </sec>
        </sec>
        <ack>
            <title>Acknowledgments</title>
            <p>This work is in part supported by grants from the Canadian Research Chair program, Canadian Foundation of Innovation, Ontario Ministry of Research and Innovation, Canadian Natural Science and Engineering Research Council (NSERC), and Brock University to PL, and was made possible using Compute Canada (now known as Digital Research Alliance of Canada) high-performance computing facilities. This work has been presented as a preprint at BioRxiv at 
                <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1101/2020.03.19.999144">https://doi.org/10.1101/2020.03.19.999144</ext-link>.</p>
        </ack>
        <ref-list>
            <title>References</title>
            <ref id="ref1">
                <label>1</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Deininger</surname>
                            <given-names>PL</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Mobile elements and mammalian genome evolution.</article-title>
                    <source>

                        <italic toggle="yes">Curr. Opin. Genet. Dev.</italic>
</source>
                    <year>2003</year>;<volume>13</volume>(<issue>6</issue>):<fpage>651</fpage>&#x2013;<lpage>658</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.gde.2003.10.013</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref2">
                <label>2</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Lander</surname>
                            <given-names>ES</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Initial sequencing and analysis of the human genome.</article-title>
                    <source>

                        <italic toggle="yes">Nature.</italic>
</source>
                    <year>2001</year>;<volume>409</volume>(<issue>6822</issue>):<fpage>860</fpage>&#x2013;<lpage>921</lpage>.
                    <pub-id pub-id-type="pmid">11237011</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref3">
                <label>3</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Tang</surname>
                            <given-names>W</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Liang</surname>
                            <given-names>P</given-names>
                        </name>
</person-group>:
                    <article-title>Comparative Genomics Analysis Reveals High Levels of Differential Retrotransposition among Primates from the Hominidae and the Cercopithecidae Families.</article-title>
                    <source>

                        <italic toggle="yes">Genome Biol. Evol.</italic>
</source>
                    <year>2019</year>;<volume>11</volume>(<issue>11</issue>):<fpage>3309</fpage>&#x2013;<lpage>3325</lpage>.
                    <pub-id pub-id-type="pmid">31651947</pub-id>
                    <pub-id pub-id-type="doi">10.1093/gbe/evz234</pub-id>
                    <pub-id pub-id-type="pmcid">PMC6934888</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref4">
                <label>4</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Cordaux</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Batzer</surname>
                            <given-names>MA</given-names>
                        </name>
</person-group>:
                    <article-title>The impact of retrotransposons on human genome evolution.</article-title>
                    <source>

                        <italic toggle="yes">Nat. Rev. Genet.</italic>
</source>
                    <year>2009</year>;<volume>10</volume>(<issue>10</issue>):<fpage>691</fpage>&#x2013;<lpage>703</lpage>.
                    <pub-id pub-id-type="pmid">19763152</pub-id>
                    <pub-id pub-id-type="doi">10.1038/nrg2640</pub-id>
                    <pub-id pub-id-type="pmcid">PMC2884099</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref5">
                <label>5</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Symer</surname>
                            <given-names>DE</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Human l1 retrotransposition is associated with genetic instability in vivo.</article-title>
                    <source>

                        <italic toggle="yes">Cell.</italic>
</source>
                    <year>2002</year>;<volume>110</volume>(<issue>3</issue>):<fpage>327</fpage>&#x2013;<lpage>338</lpage>.
                    <pub-id pub-id-type="pmid">12176320</pub-id>
                    <pub-id pub-id-type="doi">10.1016/S0092-8674(02)00839-5</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref6">
                <label>6</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Szak</surname>
                            <given-names>ST</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Identifying related L1 retrotransposons by analyzing 3' transduced sequences.</article-title>
                    <source>

                        <italic toggle="yes">Genome Biol.</italic>
</source>
                    <year>2003</year>;<volume>4</volume>(<issue>5</issue>):<fpage>R30</fpage>.
                    <pub-id pub-id-type="pmid">12734010</pub-id>
                    <pub-id pub-id-type="doi">10.1186/gb-2003-4-5-r30</pub-id>
                    <pub-id pub-id-type="pmcid">PMC156586</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref7">
                <label>7</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Han</surname>
                            <given-names>JS</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Szak</surname>
                            <given-names>ST</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Boeke</surname>
                            <given-names>JD</given-names>
                        </name>
</person-group>:
                    <article-title>Transcriptional disruption by the L1 retrotransposon and implications for mammalian transcriptomes.</article-title>
                    <source>

                        <italic toggle="yes">Nature.</italic>
</source>
                    <year>2004</year>;<volume>429</volume>(<issue>6989</issue>):<fpage>268</fpage>&#x2013;<lpage>274</lpage>.
                    <pub-id pub-id-type="pmid">15152245</pub-id>
                    <pub-id pub-id-type="doi">10.1038/nature02536</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref8">
                <label>8</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Wheelan</surname>
                            <given-names>SJ</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Gene-breaking: a new paradigm for human retrotransposon-mediated gene evolution.</article-title>
                    <source>

                        <italic toggle="yes">Genome Res.</italic>
</source>
                    <year>2005</year>;<volume>15</volume>(<issue>8</issue>):<fpage>1073</fpage>&#x2013;<lpage>1078</lpage>.
                    <pub-id pub-id-type="pmid">16024818</pub-id>
                    <pub-id pub-id-type="doi">10.1101/gr.3688905</pub-id>
                    <pub-id pub-id-type="pmcid">PMC1182219</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref9">
                <label>9</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Mita</surname>
                            <given-names>P</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Boeke</surname>
                            <given-names>JD</given-names>
                        </name>
</person-group>:
                    <article-title>How retrotransposons shape genome regulation.</article-title>
                    <source>

                        <italic toggle="yes">Curr. Opin. Genet. Dev.</italic>
</source>
                    <year>2016</year>;<volume>37</volume>:<fpage>90</fpage>&#x2013;<lpage>100</lpage>.
                    <pub-id pub-id-type="pmid">26855260</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.gde.2016.01.001</pub-id>
                    <pub-id pub-id-type="pmcid">PMC4914423</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref10">
                <label>10</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Callinan</surname>
                            <given-names>PA</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Alu retrotransposition-mediated deletion.</article-title>
                    <source>

                        <italic toggle="yes">J. Mol. Biol.</italic>
</source>
                    <year>2005</year>;<volume>348</volume>(<issue>4</issue>):<fpage>791</fpage>&#x2013;<lpage>800</lpage>.
                    <pub-id pub-id-type="pmid">15843013</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.jmb.2005.02.043</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref11">
                <label>11</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Han</surname>
                            <given-names>K</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Genomic rearrangements by LINE-1 insertion-mediated deletion in the human and chimpanzee lineages.</article-title>
                    <source>

                        <italic toggle="yes">Nucleic Acids Res.</italic>
</source>
                    <year>2005</year>;<volume>33</volume>(<issue>13</issue>):<fpage>4040</fpage>&#x2013;<lpage>4052</lpage>.
                    <pub-id pub-id-type="pmid">16034026</pub-id>
                    <pub-id pub-id-type="doi">10.1093/nar/gki718</pub-id>
                    <pub-id pub-id-type="pmcid">PMC1179734</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref12">
                <label>12</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Sen</surname>
                            <given-names>SK</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Human genomic deletions mediated by recombination between Alu elements.</article-title>
                    <source>

                        <italic toggle="yes">Am. J. Hum. Genet.</italic>
</source>
                    <year>2006</year>;<volume>79</volume>(<issue>1</issue>):<fpage>41</fpage>&#x2013;<lpage>53</lpage>.
                    <pub-id pub-id-type="pmid">16773564</pub-id>
                    <pub-id pub-id-type="doi">10.1086/504600</pub-id>
                    <pub-id pub-id-type="pmcid">PMC1474114</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref13">
                <label>13</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Han</surname>
                            <given-names>K</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Alu recombination-mediated structural deletions in the chimpanzee genome.</article-title>
                    <source>

                        <italic toggle="yes">PLoS Genet.</italic>
</source>
                    <year>2007</year>;<volume>3</volume>(<issue>10</issue>):<fpage>1939</fpage>&#x2013;<lpage>1949</lpage>.
                    <pub-id pub-id-type="pmid">17953488</pub-id>
                    <pub-id pub-id-type="doi">10.1371/journal.pgen.0030184</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref14">
                <label>14</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Quinn</surname>
                            <given-names>JP</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bubb</surname>
                            <given-names>VJ</given-names>
                        </name>
</person-group>:
                    <article-title>SVA retrotransposons as modulators of gene expression.</article-title>
                    <source>

                        <italic toggle="yes">Mob. Genet. Elem.</italic>
</source>
                    <year>2014</year>;<volume>4</volume>:<fpage>e32102</fpage>.
                    <pub-id pub-id-type="pmid">25077041</pub-id>
                    <pub-id pub-id-type="doi">10.4161/mge.32102</pub-id>
                    <pub-id pub-id-type="pmcid">PMC4114917</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref15">
                <label>15</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Konkel</surname>
                            <given-names>MK</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Batzer</surname>
                            <given-names>MA</given-names>
                        </name>
</person-group>:
                    <article-title>A mobile threat to genome stability: The impact of non-LTR retrotransposons upon the human genome.</article-title>
                    <source>

                        <italic toggle="yes">Semin. Cancer Biol.</italic>
</source>
                    <year>2010</year>;<volume>20</volume>:<fpage>211</fpage>&#x2013;<lpage>221</lpage>.
                    <pub-id pub-id-type="pmid">20307669</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.semcancer.2010.03.001</pub-id>
                    <pub-id pub-id-type="pmcid">PMC2925057</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref16">
                <label>16</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Chuong</surname>
                            <given-names>EB</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Elde</surname>
                            <given-names>NC</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Feschotte</surname>
                            <given-names>C</given-names>
                        </name>
</person-group>:
                    <article-title>Regulatory evolution of innate immunity through co-option of endogenous retroviruses.</article-title>
                    <source>

                        <italic toggle="yes">Science.</italic>
</source>
                    <year>2016</year>;<volume>351</volume>(<issue>6277</issue>):<fpage>1083</fpage>&#x2013;<lpage>1087</lpage>.
                    <pub-id pub-id-type="pmid">26941318</pub-id>
                    <pub-id pub-id-type="doi">10.1126/science.aad5497</pub-id>
                    <pub-id pub-id-type="pmcid">PMC4887275</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref17">
                <label>17</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Feschotte</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Pritham</surname>
                            <given-names>EJ</given-names>
                        </name>
</person-group>:
                    <article-title>DNA transposons and the evolution of eukaryotic genomes.</article-title>
                    <source>

                        <italic toggle="yes">Annu. Rev. Genet.</italic>
</source>
                    <year>2007</year>;<volume>41</volume>:<fpage>331</fpage>&#x2013;<lpage>368</lpage>.
                    <pub-id pub-id-type="pmid">18076328</pub-id>
                    <pub-id pub-id-type="doi">10.1146/annurev.genet.40.110405.090448</pub-id>
                    <pub-id pub-id-type="pmcid">PMC2167627</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref18">
                <label>18</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Smit</surname>
                            <given-names>AF</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Riggs</surname>
                            <given-names>AD</given-names>
                        </name>
</person-group>:
                    <article-title>Tiggers and DNA transposon fossils in the human genome.</article-title>
                    <source>

                        <italic toggle="yes">Proc. Natl. Acad. Sci. U. S. A.</italic>
</source>
                    <year>1996</year>;<volume>93</volume>(<issue>4</issue>):<fpage>1443</fpage>&#x2013;<lpage>1448</lpage>.
                    <pub-id pub-id-type="pmid">8643651</pub-id>
                    <pub-id pub-id-type="doi">10.1073/pnas.93.4.1443</pub-id>
                    <pub-id pub-id-type="pmcid">PMC39958</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref19">
                <label>19</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Pace Ii</surname>
                            <given-names>JK</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Feschotte</surname>
                            <given-names>C</given-names>
                        </name>
</person-group>:
                    <article-title>The evolutionary history of human DNA transposons: Evidence for intense activity in the primate lineage.</article-title>
                    <source>

                        <italic toggle="yes">Genome Res.</italic>
</source>
                    <year>2007</year>;<volume>17</volume>(<issue>4</issue>):<fpage>4</fpage>&#x2013;<lpage>4</lpage>.</mixed-citation>
            </ref>
            <ref id="ref20">
                <label>20</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kazazian</surname>
                            <given-names>HH</given-names>
                            <suffix>Jr</suffix>
                        </name>

                        <name name-style="western">
                            <surname>Goodier</surname>
                            <given-names>JL</given-names>
                        </name>
</person-group>:
                    <article-title>LINE drive. retrotransposition and genome instability.</article-title>
                    <source>

                        <italic toggle="yes">Cell.</italic>
</source>
                    <year>2002</year>;<volume>110</volume>(<issue>3</issue>):<fpage>277</fpage>&#x2013;<lpage>280</lpage>.
                    <pub-id pub-id-type="doi">10.1016/S0092-8674(02)00868-1</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref21">
                <label>21</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Mayer</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Meese</surname>
                            <given-names>E</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mueller-Lantzsch</surname>
                            <given-names>N</given-names>
                        </name>
</person-group>:
                    <article-title>Human endogenous retrovirus K homologous sequences and their coding capacity in Old World primates.</article-title>
                    <source>

                        <italic toggle="yes">J. Virol.</italic>
</source>
                    <year>1998</year>;<volume>72</volume>(<issue>3</issue>):<fpage>1870</fpage>&#x2013;<lpage>1875</lpage>.
                    <pub-id pub-id-type="pmid">9499038</pub-id>
                    <pub-id pub-id-type="doi">10.1128/JVI.72.3.1870-1875.1998</pub-id>
                    <pub-id pub-id-type="pmcid">PMC109477</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref22">
                <label>22</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Costas</surname>
                            <given-names>J</given-names>
                        </name>
</person-group>:
                    <article-title>Evolutionary dynamics of the human endogenous retrovirus family HERV-K inferred from full-length proviral genomes.</article-title>
                    <source>

                        <italic toggle="yes">J. Mol. Evol.</italic>
</source>
                    <year>2001</year>;<volume>53</volume>(<issue>3</issue>):<fpage>237</fpage>&#x2013;<lpage>243</lpage>.
                    <pub-id pub-id-type="pmid">11523010</pub-id>
                    <pub-id pub-id-type="doi">10.1007/s002390010213</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref23">
                <label>23</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Hughes</surname>
                            <given-names>JF</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Coffin</surname>
                            <given-names>JM</given-names>
                        </name>
</person-group>:
                    <article-title>Human endogenous retrovirus K solo-LTR formation and insertional polymorphisms: implications for human and viral evolution.</article-title>
                    <source>

                        <italic toggle="yes">Proc. Natl. Acad. Sci. U. S. A.</italic>
</source>
                    <year>2004</year>;<volume>101</volume>(<issue>6</issue>):<fpage>1668</fpage>&#x2013;<lpage>1672</lpage>.
                    <pub-id pub-id-type="pmid">14757818</pub-id>
                    <pub-id pub-id-type="doi">10.1073/pnas.0307885100</pub-id>
                    <pub-id pub-id-type="pmcid">PMC341815</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref24">
                <label>24</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Jern</surname>
                            <given-names>P</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Sperber</surname>
                            <given-names>GO</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Blomberg</surname>
                            <given-names>J</given-names>
                        </name>
</person-group>:
                    <article-title>Definition and variation of human endogenous retrovirus H.</article-title>
                    <source>

                        <italic toggle="yes">Virology.</italic>
</source>
                    <year>2004</year>;<volume>327</volume>(<issue>1</issue>):<fpage>93</fpage>&#x2013;<lpage>110</lpage>.
                    <pub-id pub-id-type="pmid">15327901</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.virol.2004.06.023</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref25">
                <label>25</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Belshaw</surname>
                            <given-names>R</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Genomewide screening reveals high levels of insertional polymorphism in the human endogenous retrovirus family HERV-K (HML2): implications for present-day activity.</article-title>
                    <source>

                        <italic toggle="yes">J. Virol.</italic>
</source>
                    <year>2005</year>;<volume>79</volume>(<issue>19</issue>):<fpage>12507</fpage>&#x2013;<lpage>12514</lpage>.
                    <pub-id pub-id-type="pmid">16160178</pub-id>
                    <pub-id pub-id-type="doi">10.1128/JVI.79.19.12507-12514.2005</pub-id>
                    <pub-id pub-id-type="pmcid">PMC1211540</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref26">
                <label>26</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Shin</surname>
                            <given-names>W</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Human-specific HERV-K insertion causes genomic variations in the human genome.</article-title>
                    <source>

                        <italic toggle="yes">PLoS One.</italic>
</source>
                    <year>2013</year>;<volume>8</volume>(<issue>4</issue>):<fpage>e60605</fpage>.
                    <pub-id pub-id-type="pmid">23593260</pub-id>
                    <pub-id pub-id-type="doi">10.1371/journal.pone.0060605</pub-id>
                    <pub-id pub-id-type="pmcid">PMC3625200</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref27">
                <label>27</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ding</surname>
                            <given-names>W</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>L1 elements, processed pseudogenes and retrogenes in mammalian genomes.</article-title>
                    <source>

                        <italic toggle="yes">IUBMB Life.</italic>
</source>
                    <year>2006</year>;<volume>58</volume>(<issue>12</issue>):<fpage>677</fpage>&#x2013;<lpage>685</lpage>.
                    <pub-id pub-id-type="pmid">17424906</pub-id>
                    <pub-id pub-id-type="doi">10.1080/15216540601034856</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref28">
                <label>28</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Raiz</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>The non-autonomous retrotransposon SVA is trans-mobilized by the human LINE-1 protein machinery.</article-title>
                    <source>

                        <italic toggle="yes">Nucleic Acids Res.</italic>
</source>
                    <year>2012</year>;<volume>40</volume>(<issue>4</issue>):<fpage>1666</fpage>&#x2013;<lpage>1683</lpage>.
                    <pub-id pub-id-type="pmid">22053090</pub-id>
                    <pub-id pub-id-type="doi">10.1093/nar/gkr863</pub-id>
                    <pub-id pub-id-type="pmcid">PMC3287187</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref29">
                <label>29</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kazazian</surname>
                            <given-names>HH</given-names>
                            <suffix>Jr</suffix>
                        </name>

                        <name name-style="western">
                            <surname>Moran</surname>
                            <given-names>JV</given-names>
                        </name>
</person-group>:
                    <article-title>The impact of L1 retrotransposons on the human genome.</article-title>
                    <source>

                        <italic toggle="yes">Nat. Genet.</italic>
</source>
                    <year>1998</year>;<volume>19</volume>(<issue>1</issue>):<fpage>19</fpage>&#x2013;<lpage>24</lpage>.
                    <pub-id pub-id-type="doi">10.1038/ng0598-19</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref30">
                <label>30</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kazazian</surname>
                            <given-names>HH</given-names>
                            <suffix>Jr</suffix>
                        </name>
</person-group>:
                    <article-title>Genetics. L1 retrotransposons shape the mammalian genome.</article-title>
                    <source>

                        <italic toggle="yes">Science.</italic>
</source>
                    <year>2000</year>;<volume>289</volume>(<issue>5482</issue>):<fpage>1152</fpage>&#x2013;<lpage>1153</lpage>.
                    <pub-id pub-id-type="doi">10.1126/science.289.5482.1152</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref31">
                <label>31</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ostertag</surname>
                            <given-names>EM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kazazian</surname>
                            <given-names>HH</given-names>
                            <suffix>Jr</suffix>
                        </name>
</person-group>:
                    <article-title>Biology of mammalian L1 retrotransposons.</article-title>
                    <source>

                        <italic toggle="yes">Annu. Rev. Genet.</italic>
</source>
                    <year>2001</year>;<volume>35</volume>:<fpage>501</fpage>&#x2013;<lpage>538</lpage>.
                    <pub-id pub-id-type="doi">10.1146/annurev.genet.35.102401.091032</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref32">
                <label>32</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Goodier</surname>
                            <given-names>JL</given-names>
                        </name>
</person-group>:
                    <article-title>Restricting retrotransposons: a review.</article-title>
                    <source>

                        <italic toggle="yes">Mob. DNA.</italic>
</source>
                    <year>2016</year>;<volume>7</volume>:<fpage>16</fpage>.
                    <pub-id pub-id-type="pmid">27525044</pub-id>
                    <pub-id pub-id-type="doi">10.1186/s13100-016-0070-z</pub-id>
                    <pub-id pub-id-type="pmcid">PMC4982230</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref33">
                <label>33</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Cost</surname>
                            <given-names>GJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Boeke</surname>
                            <given-names>JD</given-names>
                        </name>
</person-group>:
                    <article-title>Targeting of human retrotransposon integration is directed by the specificity of the L1 endonuclease for regions of unusual DNA structure.</article-title>
                    <source>

                        <italic toggle="yes">Biochemistry.</italic>
</source>
                    <year>1998</year>;<volume>37</volume>(<issue>51</issue>):<fpage>18081</fpage>&#x2013;<lpage>18093</lpage>.
                    <pub-id pub-id-type="pmid">9922177</pub-id>
                    <pub-id pub-id-type="doi">10.1021/bi981858s</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref34">
                <label>34</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Jurka</surname>
                            <given-names>J</given-names>
                        </name>
</person-group>:
                    <article-title>Sequence patterns indicate an enzymatic involvement in integration of mammalian retroposons.</article-title>
                    <source>

                        <italic toggle="yes">Proc. Natl. Acad. Sci. U. S. A.</italic>
</source>
                    <year>1997</year>;<volume>94</volume>(<issue>5</issue>):<fpage>1872</fpage>&#x2013;<lpage>1877</lpage>.
                    <pub-id pub-id-type="pmid">9050872</pub-id>
                    <pub-id pub-id-type="doi">10.1073/pnas.94.5.1872</pub-id>
                    <pub-id pub-id-type="pmcid">PMC20010</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref35">
                <label>35</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Xing</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Emergence of primate genes by retrotransposon-mediated sequence transduction.</article-title>
                    <source>

                        <italic toggle="yes">Proc. Natl. Acad. Sci. U. S. A.</italic>
</source>
                    <year>2006</year>;<volume>103</volume>(<issue>47</issue>):<fpage>17608</fpage>&#x2013;<lpage>17613</lpage>.
                    <pub-id pub-id-type="pmid">17101974</pub-id>
                    <pub-id pub-id-type="doi">10.1073/pnas.0603224103</pub-id>
                    <pub-id pub-id-type="pmcid">PMC1693794</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref36">
                <label>36</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Tang</surname>
                            <given-names>W</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Mobile elements contribute to the uniqueness of human genome with 15,000 human-specific insertions and 14 Mbp sequence increase.</article-title>
                    <source>

                        <italic toggle="yes">DNA Res.</italic>
</source>
                    <year>2018</year>;<volume>25</volume>(<issue>5</issue>):<fpage>521</fpage>&#x2013;<lpage>533</lpage>.
                    <pub-id pub-id-type="pmid">30052927</pub-id>
                    <pub-id pub-id-type="doi">10.1093/dnares/dsy022</pub-id>
                    <pub-id pub-id-type="pmcid">PMC6191304</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref37">
                <label>37</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kapitonov</surname>
                            <given-names>VV</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Jurka</surname>
                            <given-names>J</given-names>
                        </name>
</person-group>:
                    <article-title>Rolling-circle transposons in eukaryotes.</article-title>
                    <source>

                        <italic toggle="yes">Proc. Natl. Acad. Sci. U. S. A.</italic>
</source>
                    <year>2001</year>;<volume>98</volume>(<issue>15</issue>):<fpage>8714</fpage>&#x2013;<lpage>8719</lpage>.
                    <pub-id pub-id-type="pmid">11447285</pub-id>
                    <pub-id pub-id-type="doi">10.1073/pnas.151269298</pub-id>
                    <pub-id pub-id-type="pmcid">PMC37501</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref38">
                <label>38</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Pritham</surname>
                            <given-names>EJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Putliwala</surname>
                            <given-names>T</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Feschotte</surname>
                            <given-names>C</given-names>
                        </name>
</person-group>:
                    <article-title>Mavericks, a novel class of giant transposable elements widespread in eukaryotes and related to DNA viruses.</article-title>
                    <source>

                        <italic toggle="yes">Gene.</italic>
</source>
                    <year>2007</year>;<volume>390</volume>(<issue>1-2</issue>):<fpage>3</fpage>&#x2013;<lpage>17</lpage>.
                    <pub-id pub-id-type="pmid">17034960</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.gene.2006.08.008</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref39">
                <label>39</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Zhang</surname>
                            <given-names>Q</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Arbuckle</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wessler</surname>
                            <given-names>SR</given-names>
                        </name>
</person-group>:
                    <article-title>Recent, extensive, and preferential insertion of members of the miniature inverted-repeat transposable element family Heartbreaker into genic regions of maize.</article-title>
                    <source>

                        <italic toggle="yes">Proc. Natl. Acad. Sci. U. S. A.</italic>
</source>
                    <year>2000</year>;<volume>97</volume>(<issue>3</issue>):<fpage>1160</fpage>&#x2013;<lpage>1165</lpage>.
                    <pub-id pub-id-type="pmid">10655501</pub-id>
                    <pub-id pub-id-type="doi">10.1073/pnas.97.3.1160</pub-id>
                    <pub-id pub-id-type="pmcid">PMC15555</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref40">
                <label>40</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Feschotte</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Swamy</surname>
                            <given-names>L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wessler</surname>
                            <given-names>SR</given-names>
                        </name>
</person-group>:
                    <article-title>Genome-wide analysis of mariner-like transposable elements in rice reveals complex relationships with stowaway miniature inverted repeat transposable elements (MITEs).</article-title>
                    <source>

                        <italic toggle="yes">Genetics.</italic>
</source>
                    <year>2003</year>;<volume>163</volume>(<issue>2</issue>):<fpage>747</fpage>&#x2013;<lpage>758</lpage>.
                    <pub-id pub-id-type="pmid">12618411</pub-id>
                    <pub-id pub-id-type="doi">10.1093/genetics/163.2.747</pub-id>
                    <pub-id pub-id-type="pmcid">PMC1462451</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref41">
                <label>41</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Wang</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Whole genome computational comparative genomics: A fruitful approach for ascertaining Alu insertion polymorphisms.</article-title>
                    <source>

                        <italic toggle="yes">Gene.</italic>
</source>
                    <year>2006</year>;<volume>365</volume>:<fpage>11</fpage>&#x2013;<lpage>20</lpage>.
                    <pub-id pub-id-type="pmid">16376498</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.gene.2005.09.031</pub-id>
                    <pub-id pub-id-type="pmcid">PMC1847407</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref42">
                <label>42</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Zerbino</surname>
                            <given-names>DR</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Ensembl 2018.</article-title>
                    <source>

                        <italic toggle="yes">Nucleic Acids Res.</italic>
</source>
                    <year>2018</year>;<volume>46</volume>(<issue>D1</issue>):<fpage>D754</fpage>&#x2013;<lpage>D761</lpage>.
                    <pub-id pub-id-type="pmid">29155950</pub-id>
                    <pub-id pub-id-type="doi">10.1093/nar/gkx1098</pub-id>
                    <pub-id pub-id-type="pmcid">PMC5753206</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref43">
                <label>43</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Hedges</surname>
                            <given-names>SB</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Dudley</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kumar</surname>
                            <given-names>S</given-names>
                        </name>
</person-group>:
                    <article-title>TimeTree: a public knowledge-base of divergence times among organisms.</article-title>
                    <source>

                        <italic toggle="yes">Bioinformatics.</italic>
</source>
                    <year>2006</year>;<volume>22</volume>(<issue>23</issue>):<fpage>2971</fpage>&#x2013;<lpage>2972</lpage>.
                    <pub-id pub-id-type="pmid">17021158</pub-id>
                    <pub-id pub-id-type="doi">10.1093/bioinformatics/btl505</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref44">
                <label>44</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Pipes</surname>
                            <given-names>L</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>The non-human primate reference transcriptome resource (NHPRTR) for comparative functional genomics.</article-title>
                    <source>

                        <italic toggle="yes">Nucleic Acids Res.</italic>
</source>
                    <year>2013</year>;<volume>41</volume>(<issue>Database issue</issue>):<fpage>D906</fpage>&#x2013;<lpage>D914</lpage>.
                    <pub-id pub-id-type="pmid">23203872</pub-id>
                    <pub-id pub-id-type="doi">10.1093/nar/gks1268</pub-id>
                    <pub-id pub-id-type="pmcid">PMC3531109</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref45">
                <label>45</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Jasinska</surname>
                            <given-names>AJ</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Genetic variation and gene expression across multiple tissues and developmental stages in a nonhuman primate.</article-title>
                    <source>

                        <italic toggle="yes">Nat. Genet.</italic>
</source>
                    <year>2017</year>;<volume>49</volume>(<issue>12</issue>):<fpage>1714</fpage>&#x2013;<lpage>1721</lpage>.
                    <pub-id pub-id-type="pmid">29083405</pub-id>
                    <pub-id pub-id-type="doi">10.1038/ng.3959</pub-id>
                    <pub-id pub-id-type="pmcid">PMC5714271</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref46">
                <label>46</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Shin</surname>
                            <given-names>H</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Variation in RNA-Seq transcriptome profiles of peripheral whole blood from healthy individuals with and without globin depletion.</article-title>
                    <source>

                        <italic toggle="yes">PLoS One.</italic>
</source>
                    <year>2014</year>;<volume>9</volume>(<issue>3</issue>):<fpage>e91041</fpage>.
                    <pub-id pub-id-type="pmid">24608128</pub-id>
                    <pub-id pub-id-type="doi">10.1371/journal.pone.0091041</pub-id>
                    <pub-id pub-id-type="pmcid">PMC3946641</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref47">
                <label>47</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Jordan</surname>
                            <given-names>VE</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A computational reconstruction of Papio phylogeny using Alu insertion polymorphisms.</article-title>
                    <source>

                        <italic toggle="yes">Mob. DNA.</italic>
</source>
                    <year>2018</year>;<volume>9</volume>:<fpage>13</fpage>.
                    <pub-id pub-id-type="pmid">29632618</pub-id>
                    <pub-id pub-id-type="doi">10.1186/s13100-018-0118-3</pub-id>
                    <pub-id pub-id-type="pmcid">PMC5885306</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref48">
                <label>48</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Dewannieux</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Esnault</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Heidmann</surname>
                            <given-names>T</given-names>
                        </name>
</person-group>:
                    <article-title>LINE-mediated retrotransposition of marked Alu sequences.</article-title>
                    <source>

                        <italic toggle="yes">Nat. Genet.</italic>
</source>
                    <year>2003</year>;<volume>35</volume>(<issue>1</issue>):<fpage>41</fpage>&#x2013;<lpage>48</lpage>.
                    <pub-id pub-id-type="pmid">12897783</pub-id>
                    <pub-id pub-id-type="doi">10.1038/ng1223</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref49">
                <label>49</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Wallace</surname>
                            <given-names>N</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>LINE-1 ORF1 protein enhances Alu SINE retrotransposition.</article-title>
                    <source>

                        <italic toggle="yes">Gene.</italic>
</source>
                    <year>2008</year>;<volume>419</volume>(<issue>1-2</issue>):<fpage>1</fpage>&#x2013;<lpage>6</lpage>.
                    <pub-id pub-id-type="pmid">18534786</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.gene.2008.04.007</pub-id>
                    <pub-id pub-id-type="pmcid">PMC2491492</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref50">
                <label>50</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Costas</surname>
                            <given-names>J</given-names>
                        </name>
</person-group>:
                    <article-title>Characterization of the intragenomic spread of the human endogenous retrovirus family HERV-W.</article-title>
                    <source>

                        <italic toggle="yes">Mol. Biol. Evol.</italic>
</source>
                    <year>2002</year>;<volume>19</volume>(<issue>4</issue>):<fpage>526</fpage>&#x2013;<lpage>533</lpage>.
                    <pub-id pub-id-type="pmid">11919294</pub-id>
                    <pub-id pub-id-type="doi">10.1093/oxfordjournals.molbev.a004108</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref51">
                <label>51</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Grandi</surname>
                            <given-names>N</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Contribution of type W human endogenous retroviruses to the human genome: characterization of HERV-W proviral insertions and processed pseudogenes.</article-title>
                    <source>

                        <italic toggle="yes">Retrovirology.</italic>
</source>
                    <year>2016</year>;<volume>13</volume>(<issue>1</issue>):<fpage>67</fpage>.
                    <pub-id pub-id-type="pmid">27613107</pub-id>
                    <pub-id pub-id-type="doi">10.1186/s12977-016-0301-x</pub-id>
                    <pub-id pub-id-type="pmcid">PMC5016936</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref52">
                <label>52</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Bennett</surname>
                            <given-names>EA</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Active Alu retrotransposons in the human genome.</article-title>
                    <source>

                        <italic toggle="yes">Genome Res.</italic>
</source>
                    <year>2008</year>;<volume>18</volume>(<issue>12</issue>):<fpage>1875</fpage>&#x2013;<lpage>1883</lpage>.
                    <pub-id pub-id-type="pmid">18836035</pub-id>
                    <pub-id pub-id-type="doi">10.1101/gr.081737.108</pub-id>
                    <pub-id pub-id-type="pmcid">PMC2593586</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref53">
                <label>53</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Tutar</surname>
                            <given-names>Y</given-names>
                        </name>
</person-group>:
                    <article-title>Pseudogenes.</article-title>
                    <source>

                        <italic toggle="yes">Comp Funct Genomics.</italic>
</source>
                    <year>2012</year>;<volume>2012</volume>:<fpage>424526</fpage>.</mixed-citation>
            </ref>
            <ref id="ref54">
                <label>54</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Harris</surname>
                            <given-names>RS</given-names>
                        </name>
</person-group>:
                    <source>

                        <italic toggle="yes">Improved pairwise alignment of genomic dna.</italic>
</source>
                    <publisher-name>Pennsylvania State University</publisher-name>:<year>2007</year>;<fpage>84</fpage>.</mixed-citation>
            </ref>
            <ref id="ref55">
                <label>55</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kent</surname>
                            <given-names>WJ</given-names>
                        </name>
</person-group>:
                    <article-title>BLAT--the BLAST-like alignment tool.</article-title>
                    <source>

                        <italic toggle="yes">Genome Res.</italic>
</source>
                    <year>2002</year>;<volume>12</volume>(<issue>4</issue>):<fpage>656</fpage>&#x2013;<lpage>664</lpage>.
                    <pub-id pub-id-type="pmid">11932250</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref56">
                <label>56</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Hinrichs</surname>
                            <given-names>AS</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>The UCSC Genome Browser Database: update 2006.</article-title>
                    <source>

                        <italic toggle="yes">Nucleic Acids Res.</italic>
</source>
                    <year>2006</year>;<volume>34</volume>(<issue>Database issue</issue>):<fpage>D590</fpage>&#x2013;<lpage>D598</lpage>.
                    <pub-id pub-id-type="pmid">16381938</pub-id>
                    <pub-id pub-id-type="doi">10.1093/nar/gkj144</pub-id>
                    <pub-id pub-id-type="pmcid">PMC1347506</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref57">
                <label>57</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Hu</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Zheng</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Shang</surname>
                            <given-names>X</given-names>
                        </name>
</person-group>:
                    <article-title>MiteFinderII: a novel tool to identify miniature inverted-repeat transposable elements hidden in eukaryotic genomes.</article-title>
                    <source>

                        <italic toggle="yes">BMC Med. Genet.</italic>
</source>
                    <year>2018</year>;<volume>11</volume>(<issue>Suppl 5</issue>):<fpage>101</fpage>.
                    <pub-id pub-id-type="pmid">30453969</pub-id>
                    <pub-id pub-id-type="doi">10.1186/s12920-018-0418-y</pub-id>
                    <pub-id pub-id-type="pmcid">PMC6245586</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref58">
                <label>58</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Page</surname>
                            <given-names>RD</given-names>
                        </name>
</person-group>:
                    <article-title>TreeView: an application to display phylogenetic trees on personal computers.</article-title>
                    <source>

                        <italic toggle="yes">Comput. Appl. Biosci.</italic>
</source>
                    <year>1996</year>;<volume>12</volume>(<issue>4</issue>):<fpage>357</fpage>&#x2013;<lpage>358</lpage>.
                    <pub-id pub-id-type="pmid">8902363</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref59">
                <label>59</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Madeira</surname>
                            <given-names>F</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>The EMBL-EBI search and sequence analysis tools APIs in 2019.</article-title>
                    <source>

                        <italic toggle="yes">Nucleic Acids Res.</italic>
</source>
                    <year>2019</year>;<volume>47</volume>(<issue>W1</issue>):<fpage>W636</fpage>&#x2013;<lpage>W641</lpage>.
                    <pub-id pub-id-type="pmid">30976793</pub-id>
                    <pub-id pub-id-type="doi">10.1093/nar/gkz268</pub-id>
                    <pub-id pub-id-type="pmcid">PMC6602479</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref60">
                <label>60</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Tamura</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Nei</surname>
                            <given-names>M</given-names>
                        </name>
</person-group>:
                    <article-title>Estimation of the number of nucleotide substitutions in the control region of mitochondrial DNA in humans and chimpanzees.</article-title>
                    <source>

                        <italic toggle="yes">Mol. Biol. Evol.</italic>
</source>
                    <year>1993</year>;<volume>10</volume>(<issue>3</issue>):<fpage>512</fpage>&#x2013;<lpage>526</lpage>.
                    <pub-id pub-id-type="pmid">8336541</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref61">
                <label>61</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Felsenstein</surname>
                            <given-names>J</given-names>
                        </name>
</person-group>:
                    <article-title>Confidence limits on phylogenies: An approach using the bootstrap.</article-title>
                    <source>

                        <italic toggle="yes">Evolution.</italic>
</source>
                    <year>1985</year>;<volume>39</volume>(<issue>4</issue>):<fpage>783</fpage>&#x2013;<lpage>791</lpage>.
                    <pub-id pub-id-type="pmid">28561359</pub-id>
                    <pub-id pub-id-type="doi">10.2307/2408678</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref62">
                <label>62</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Jasinska</surname>
                            <given-names>AJ</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Systems biology of the vervet monkey.</article-title>
                    <source>

                        <italic toggle="yes">ILAR J.</italic>
</source>
                    <year>2013</year>;<volume>54</volume>(<issue>2</issue>):<fpage>122</fpage>&#x2013;<lpage>143</lpage>.
                    <pub-id pub-id-type="pmid">24174437</pub-id>
                    <pub-id pub-id-type="doi">10.1093/ilar/ilt049</pub-id>
                    <pub-id pub-id-type="pmcid">PMC3814400</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref63">
                <label>63</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kim</surname>
                            <given-names>D</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>TopHat2: accurate alignment of transcriptomes in the presence of insertions, deletions and gene fusions.</article-title>
                    <source>

                        <italic toggle="yes">Genome Biol.</italic>
</source>
                    <year>2013</year>;<volume>14</volume>(<issue>4</issue>):<fpage>R36</fpage>.
                    <pub-id pub-id-type="pmid">23618408</pub-id>
                    <pub-id pub-id-type="doi">10.1186/gb-2013-14-4-r36</pub-id>
                    <pub-id pub-id-type="pmcid">PMC4053844</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref64">
                <label>64</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Liang</surname>
                            <given-names>P</given-names>
                        </name>
</person-group>:
                    <article-title>The identification of retro-DNAs in primate genomes as DNA transposons mobilizing via retrotransposition.</article-title>
                    <source>

                        <italic toggle="yes">BioStudies.</italic>
</source>
                    <year>2023</year>. S-BSST1030.
                    <ext-link ext-link-type="uri" xlink:href="https://www.ebi.ac.uk/biostudies/studies/S-BSST1030">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref65">
                <label>65</label>
                <mixed-citation publication-type="other">
                    <collab>Liang Lab at Brock University</collab>:
                    <article-title>pliang64/retro-DNAs: Perl and shell scripts for retro-DNAs (retro-DNA).</article-title>
                    <source>

                        <italic toggle="yes">Zenodo.</italic>
</source>
                    <year>2023</year>.
                    <pub-id pub-id-type="doi">10.5281/zenodo.7682142</pub-id>
                </mixed-citation>
            </ref>
        </ref-list>
    </back>
    <sub-article article-type="reviewer-report" id="report251957">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.142770.r251957</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Marsano</surname>
                        <given-names>Rene Massimiliano</given-names>
                    </name>
                    <xref ref-type="aff" rid="r251957a1">1</xref>
                    <role>Referee</role>
                </contrib>
                <aff id="r251957a1">
                    <label>1</label>Universit&#x00e0; degli Studi di Bari "Aldo Moro", Bari, Italy</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>26</day>
                <month>3</month>
                <year>2024</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2024 Marsano RM</copyright-statement>
                <copyright-year>2024</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport251957" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.130043.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve-with-reservations</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>In this manuscript, the Authors conduct a comparative analysis of 10 primate genomes unveiling the identification of a new type of mobile elements, which they call &#x201c;retro-DNAs&#x201d;, displaying combined features of two Class I (non-LTR retrotransposons) and Class II (DNA transposons) elements.</p>
            <p> The Authors initially annotated DNA transposons (remnants) in the analyzed genomes and subsequently have characterized a subset of sequences featured by an unusually long TSD and a poly-A tail. Furthermore, the Authors inferred the lineage specificity of retro-DNA and determine the expression level of retro-DNA sequences identified.</p>
            <p> </p>
            <p> The manuscript is well-written and easy to read in all its parts.</p>
            <p> </p>
            <p> </p>
            <p> Find below a list of major issues.</p>
            <p> </p>
            <p> - While retro transposition can be considered as a way to spread transposable elements fragments around the genome and the information provided in this manuscript is (to my knowledge) new, I would be cautious in defining retro-DNAs as "a new type of non-autonomous non-LTR retrotransposons". retro position of expressed sequences is a common process and indeed, we do not define processed pseudogenes this way. I would therefore recommend modifying such a conclusion to avoid overstatements and misinterpretation of the result.</p>
            <p> </p>
            <p> - The expression analysis shows that some retro-DNA sequences are expressed in the testis. This finding compatible with what is usually seen for retrotranscribed pseudogenes? &#x00a0;Testis activation of pseudogenes is well-documented, suggesting potential functional roles. Many pseudogenes are activated in the testis [1],[2]. In the case of TEs (and especially non-functional TEs) testis expression may evolve new regulation functions as observed in other transposition systems which are subjected to piRNA regulation.</p>
            <p> </p>
            <p> - I cannot see the supplementary files associated to the manuscript.</p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>Yes</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>Yes</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Yes</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>Yes</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>Partly</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Yes</p>
            <p>Reviewer Expertise:</p>
            <p>transposable elements, genetics</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above.</p>
        </body>
        <back>
            <ref-list>
                <title>References</title>
                <ref id="rep-ref-251957-1">
                    <label>1</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Origins, evolution, and phenotypic impact of new genes.</article-title>
                        <source>
                            <italic>Genome Res</italic>
                        </source>.<year>2010</year>;<volume>20</volume>(<issue>10</issue>) :
                        <elocation-id>10.1101/gr.101386.109</elocation-id>
                        <fpage>1313</fpage>-<lpage>26</lpage>
                        <pub-id pub-id-type="pmid">20651121</pub-id>
                        <pub-id pub-id-type="doi">10.1101/gr.101386.109</pub-id>
                    </mixed-citation>
                </ref>
                <ref id="rep-ref-251957-2">
                    <label>2</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Structural and Comparative Analyses of Insects Suggest the Presence of an Ultra-Conserved Regulatory Element of the Genes Encoding Vacuolar-Type ATPase Subunits and Assembly Factors.</article-title>
                        <source>
                            <italic>Biology (Basel)</italic>
                        </source>.<year>2023</year>;<volume>12</volume>(<issue>8</issue>) :
                        <elocation-id>10.3390/biology12081127</elocation-id>
                        <pub-id pub-id-type="pmid">37627011</pub-id>
                        <pub-id pub-id-type="doi">10.3390/biology12081127</pub-id>
                    </mixed-citation>
                </ref>
            </ref-list>
        </back>
        <sub-article article-type="response" id="comment11411-251957">
            <front-stub>
                <contrib-group>
                    <contrib contrib-type="author">
                        <name>
                            <surname>Liang</surname>
                            <given-names>Ping</given-names>
                        </name>
                        <aff>Brock University, Canada</aff>
                    </contrib>
                </contrib-group>
                <author-notes>
                    <fn fn-type="conflict">
                        <p>
                            <bold>Competing interests: </bold>none</p>
                    </fn>
                </author-notes>
                <pub-date pub-type="epub">
                    <day>12</day>
                    <month>4</month>
                    <year>2024</year>
                </pub-date>
            </front-stub>
            <body>
                <p>Dear Dr. Marsano,</p>
                <p> </p>
                <p> Thank you very much for taking the effort to review our manuscript and for your overall positive review and constructive comments for improving the manuscript. We would like to offer our responses below to address the concerns you have raised and made some adjustments in our new version, and we would very much welcome your re-reading and approval.</p>
                <p> </p>
                <p> First, regarding your caution in our claiming retro-DNAs as &#x201c;a new type of retrotransposon&#x201d;, we would agree with you that many other types of DNAs, notably the mRNA of genes as processed pseudogenes or retrogenes, are not named as new types of retrotransposons in addition to LINEs/L1, SINEs/Alus and SVAs in the human genome. Very likely, we would agree with each other that a new type of retrotranspons (non-LTR specifically in our case), in addition to have the expected sequence features (i.e., polyA taile), would also have the ability to continue to retrotranspose like other non-autonomous non-LTR retrotransposons, such as Alus and SVAs, at least as a group, but not necessarily every individual copies. In other words, some of the retrotransposed copies (beyond the original copies) are able to be transcribed and retrotranspose. This extra feature would distinguish it from processed pseudogenes. Following this reasoning, we demonstrated that some of the retro-DNA copies have the capacity to transcribe or be transcribed and served as the parent copies of additional copies (Table 5 and Fig. 9A). Certainly, we realize the limitation of our data by lack of experimental data to the presence of demonstrate capacity of these copies, and ideally this is to be demonstrated by using the established retrotransposition assay (refs). However, this is beyond our reach as a bioinformatics-oriented research group, and it is our hope that by reporting such potential of these retro-DNA elements, interest can be stimulated by other groups to follow up with experimental verification. &#x00a0;Considering limitation of our research data, we have adjusted the relevant point in our conclusion as &#x201c;retro-DNA could be established as a new type of non-LTR retrotransposons if their intrinsic L1-based transposition capacity can be experimentally approved.&#x201d;</p>
                <p> </p>
                <p> Second, regarding our use of testis tissues for expression analysis of these retro-DNAs, our rationale is related to above point, their potential as parent copies of transmissible retro-DNAs, since only they need to be expressed in gamete cells for this to happen. It is true that a lot of genes show sporadic expression beyond their regular expression pattern in other differentiated tissue, likely attributed to the unique epigenetic profile during spermatogenesis. But this may likely also be the attributing factor for the generally higher levels of germline retrotransposition of all known retrotransposons over somatic retrotransposition, with the former responsible for all transmitted retrotransposition events observed in the genomes. For this reason, even though we did include a mixed tissues sample and blood, with the latter as a widely available somatic tissue, in our expression analysis and showed a certain level of their expression, we thought it would be more meaningful to examine their expression in germline tissues. We would like to also include ovary as the female germline tissue, but it is unavailable across multiple species. For these reasons, we would like to argue that our focus on expression in testis tissue is justifiable. &#x00a0;If you still have some concerns, we would welcome some specific suggestions for improving our writing.</p>
                <p> </p>
                <p> Lastly, you mentioned that you could not see the supplementary files associated to the manuscript. However, I check the link to the file, https://identifiers.org/biostudies:S-BSST1030, &#x00a0;under the &#x201c;Data availability&#x201d; section, is working properly. On this linked page, there are 3 listed files you can click and download or you can choose to download all files (a button below the file list). We wonder if you could check it again and let us know if you can access properly. This is one of the public data repositories recommended by the journal.</p>
                <p> </p>
                <p> Thank you in advance for your second read and further comments or full approval.</p>
                <p> </p>
                <p> Sincerely,</p>
                <p> Ping Liang</p>
            </body>
        </sub-article>
        <sub-article article-type="response" id="comment11618-251957">
            <front-stub>
                <contrib-group>
                    <contrib contrib-type="author">
                        <name>
                            <surname>Liang</surname>
                            <given-names>Ping</given-names>
                        </name>
                        <aff>Brock University, Canada</aff>
                    </contrib>
                </contrib-group>
                <author-notes>
                    <fn fn-type="conflict">
                        <p>
                            <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                    </fn>
                </author-notes>
                <pub-date pub-type="epub">
                    <day>22</day>
                    <month>5</month>
                    <year>2024</year>
                </pub-date>
            </front-stub>
            <body>
                <p>Dear Dr. Marsano,</p>
                <p> </p>
                <p> We would like to apologize for not doing a revision that could be a bit more thorough in responding to your review comments, leaving many issues as pointed out by the second reviewer. Now we have done another revision, which we believe should have improved the written presentation and interpretation significantly. We encourage and would much appreciate your reading of our responses to the reviewer 2's comments and version 3 of the paper which should become available shortly.&#x00a0;</p>
                <p> </p>
                <p> We look forward to your further comments or full approval of the paper.</p>
                <p> </p>
                <p> Sincerely,</p>
                <p> Ping Liang</p>
            </body>
        </sub-article>
    </sub-article>
</article>
