<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN" "http://jats.nlm.nih.gov/publishing/1.2/JATS-journalpublishing1.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="other" dtd-version="1.2" xml:lang="en">
    <front>
        <journal-meta>
            <journal-id journal-id-type="pmc">F1000Research</journal-id>
            <journal-title-group>
                <journal-title>F1000Research</journal-title>
            </journal-title-group>
            <issn pub-type="epub">2046-1402</issn>
            <publisher>
                <publisher-name>F1000 Research Limited</publisher-name>
                <publisher-loc>London, UK</publisher-loc>
            </publisher>
        </journal-meta>
        <article-meta>
            <article-id pub-id-type="doi">10.12688/f1000research.170810.1</article-id>
            <article-categories>
                <subj-group subj-group-type="heading">
                    <subject>Software Tool Article</subject>
                </subj-group>
                <subj-group>
                    <subject>Articles</subject>
                </subj-group>
            </article-categories>
            <title-group>
                <article-title>DirectRepeateR: An R package for annotating direct repeats in genome assemblies</article-title>
                <fn-group content-type="pub-status">
                    <fn>
                        <p>[version 1; peer review: awaiting peer review]</p>
                    </fn>
                </fn-group>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Copeland</surname>
                        <given-names>Megan</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Software</role>
                    <role content-type="http://credit.niso.org/">Visualization</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <uri content-type="orcid">https://orcid.org/0009-0005-1397-6867</uri>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Barboza</surname>
                        <given-names>Andres</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Software</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>S. Romanowski</surname>
                        <given-names>Joseph</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a2">2</xref>
                    <xref ref-type="aff" rid="a3">3</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>N. Adelman</surname>
                        <given-names>Zach</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0001-5901-7171</uri>
                    <xref ref-type="aff" rid="a2">2</xref>
                    <xref ref-type="aff" rid="a3">3</xref>
                </contrib>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Blackmon</surname>
                        <given-names>Heath</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Software</role>
                    <role content-type="http://credit.niso.org/">Supervision</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="corresp" rid="c1">a</xref>
                    <xref ref-type="aff" rid="a1">1</xref>
                    <xref ref-type="aff" rid="a2">2</xref>
                    <xref ref-type="aff" rid="a4">4</xref>
                </contrib>
                <aff id="a1">
                    <label>1</label>Biology, Texas A&amp;M University, College Station, TX, 77843, USA</aff>
                <aff id="a2">
                    <label>2</label>Interdisciplinary Program in Genetics and Genomics, Texas A&amp;M University, College Station, Texas, 77843, USA</aff>
                <aff id="a3">
                    <label>3</label>Entomology, Texas A&amp;M University, College Station, Texas, 77843, USA</aff>
                <aff id="a4">
                    <label>4</label>Interdisciplinary Program in Ecology and Evolutionary Biology, Texas A&amp;M University, College Station, Texas, 77843, USA</aff>
            </contrib-group>
            <author-notes>
                <corresp id="c1">
                    <label>a</label>
                    <email xlink:href="mailto:blackmon@tamu.edu">blackmon@tamu.edu</email>
                </corresp>
                <fn fn-type="conflict">
                    <p>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>21</day>
                <month>10</month>
                <year>2025</year>
            </pub-date>
            <pub-date pub-type="collection">
                <year>2025</year>
            </pub-date>
            <volume>14</volume>
            <elocation-id>1147</elocation-id>
            <history>
                <date date-type="accepted">
                    <day>14</day>
                    <month>10</month>
                    <year>2025</year>
                </date>
            </history>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2025 Copeland M et al.</copyright-statement>
                <copyright-year>2025</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access article distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <self-uri content-type="pdf" xlink:href="https://f1000research.com/articles/14-1147/pdf"/>
            <abstract>
                <sec>
                    <title>Background</title>
                    <p>Direct repeats in close proximity are targets of the single-strand annealing (SSA) pathway, a mutagenic DNA repair process that impacts genome integrity. Understanding the evolution and consequences of these sequences is a critical part of understanding eukaryotic genome evolution.</p>
                </sec>
                <sec>
                    <title>Methods</title>
                    <p>DirectRepeateR, an open-source R package that scans FASTA assemblies for exact, co-oriented repeats within a user-defined spacer window. We illustrate the utility of our software in an analysis of the 
                        <italic toggle="yes">Aedes aegypti</italic> genome by testing whether the distribution of direct repeats is consistent with selection acting against repeats in genic regions.</p>
                </sec>
                <sec>
                    <title>Results</title>
                    <p>Our results suggest that selection has acted against direct repeats that flank or overlap with protein-coding DNA sequences.</p>
                </sec>
                <sec>
                    <title>Conclusion</title>
                    <p>Our software provides an accurate and computationally efficient, user-friendly, and tailorable approach for detecting direct repeats.</p>
                </sec>
            </abstract>
            <kwd-group kwd-group-type="author">
                <kwd>Direct repeats</kwd>
                <kwd>DNA repair mechanisms</kwd>
                <kwd>R package</kwd>
                <kwd>repeat annotation</kwd>
            </kwd-group>
            <funding-group>
                <award-group id="fund-1">
                    <funding-source>National Institute of Allergies and Infectious Diseases, National Institutes of Health</funding-source>
                    <award-id>AI148787</award-id>
                </award-group>
                <award-group id="fund-2">
                    <funding-source>National Institute of General Medical Sciences at the National Institutes of Health</funding-source>
                    <award-id>R35GM138098</award-id>
                </award-group>
                <funding-statement>MC, AB, and HB were supported by the National Institute of General Medical Sciences at the National Institutes of Health R35GM138098. JSR and ZNA were supported by the National Institute of Allergies and Infectious Diseases, National Institutes of Health (AI148787 to ZNA).</funding-statement>
                <funding-statement>
                    <italic>The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</italic>
                </funding-statement>
            </funding-group>
        </article-meta>
    </front>
    <body>
        <sec id="sec5" sec-type="intro">
            <title>Introduction</title>
            <p>The discovery of repetitive DNA sequences through reassociation experiments marked a foundational moment in genome biology.
                <sup>
                    <xref ref-type="bibr" rid="ref1">1</xref>
                </sup> However, the annotation of repetitive elements remains technically challenging, and despite their widespread presence, our understanding of their functional impacts continues to lag behind that of more well-characterized genomic features. In part, repeat annotation is hampered by the sheer diversity of repetitive sequences. Repeats can occur as long tandem arrays, others as dispersed copies, and they span a wide range of sizes, sequence identities, and evolutionary origins.
                <sup>
                    <xref ref-type="bibr" rid="ref2">2</xref>
                </sup>
            </p>
            <p>These repeat architectures have variable impacts on genomes and are now recognized for their roles in genome stability/instability, evolution, and disease.
                <sup>
                    <xref ref-type="bibr" rid="ref3">3</xref>&#x2013;
                    <xref ref-type="bibr" rid="ref5">5</xref>
                </sup> Repeats can influence mutation and recombination rates,
                <sup>
                    <xref ref-type="bibr" rid="ref6">6</xref>,
                    <xref ref-type="bibr" rid="ref7">7</xref>
                </sup> and their expansion/contraction underlie human disorders like Huntington&#x2019;s disease and fragile-X syndrome.
                <sup>
                    <xref ref-type="bibr" rid="ref8">8</xref>
                </sup> Additionally, microsatellites have been shown to evolve rapidly and vary considerably in abundance even among closely related taxa, underscoring their dynamic evolution
                <sup>
                    <xref ref-type="bibr" rid="ref9">9</xref>
                </sup>; they also contribute to epigenetic regulation and can influence gene expression by activating promoters or serving as transcription factor binding sites.
                <sup>
                    <xref ref-type="bibr" rid="ref10">10</xref>,
                    <xref ref-type="bibr" rid="ref11">11</xref>
                </sup>
            </p>
            <p>Proximal direct repeat pairs are of particular interest. These are two identical sequences in the same orientation, separated by a spacer region. In both bacteria and mammals, the distribution and abundance of long direct repeats appear to be shaped to minimize genome instability, with constrained chromosomal positioning in bacteria
                <sup>
                    <xref ref-type="bibr" rid="ref12">12</xref>
                </sup> and a potential role for natural selection in reducing repeat-mediated mutagenesis in the mitochondrial DNA of longer-lived mammals.
                <sup>
                    <xref ref-type="bibr" rid="ref13">13</xref>
                </sup> This mutagenic potential is also evident at the molecular level, where direct repeat pairs facilitate the DNA repair pathway single-strand annealing (SSA) (
                <xref ref-type="fig" rid="f1">Figure 1</xref>). After a double-strand break occurs, the 5&#x2019; ends of each strand are resected, and complementary repeats are used to anneal the strands. The repair mechanism is considered mutagenic because the intervening DNA between repeats and the downstream repeat is lost during the repair process.
                <sup>
                    <xref ref-type="bibr" rid="ref14">14</xref>
                </sup>
            </p>
            <fig fig-type="figure" id="f1" orientation="portrait" position="float">
                <label>
Figure 1. </label>
                <caption>
                    <title>Single-strand annealing during DNA repair.</title>
                    <p>This pathway is initiated by a double-strand break between homologous DNA repeats followed by the resection of the 5' end strands, creating 3' overhangs. These overhangs find and anneal to complementary sequences, resulting in the loss of intervening DNA and the downstream repeat. The final step is ligation, where the DNA strands are rejoined, completing the repair process. Created in BioRender. Copeland, M. (2025) 
                        <ext-link ext-link-type="uri" xlink:href="https://BioRender.com/d9g3lm9">https://BioRender.com/d9g3lm9</ext-link>.</p>
                </caption>
                <graphic id="gr1" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/188317/d9ca97c5-9a04-4550-bec6-7e2b7df83a5e_figure1.gif"/>
            </fig>
            <p>Tools such as RepeatMasker,
                <sup>
                    <xref ref-type="bibr" rid="ref15">15</xref>
                </sup> HelitronScanner,
                <sup>
                    <xref ref-type="bibr" rid="ref16">16</xref>
                </sup> 
                <italic toggle="yes">ab initio</italic> programmes like PILER,
                <sup>
                    <xref ref-type="bibr" rid="ref17">17</xref>
                </sup> and combinatorial pipelines such as RepeatModeler
                <sup>
                    <xref ref-type="bibr" rid="ref18">18</xref>
                </sup> are widely used to perform repeat annotation or classification. More recently, Repeater
                <sup>
                    <xref ref-type="bibr" rid="ref19">19</xref>
                </sup> introduced fast, alignment-free profiling of diverse repeat classes and produces informative whole-chromosome visualisations. While these tools are well suited for identifying broad repeat families or consensus-based repeat structures, they are not able to pinpoint exact, spatially localized repeat pairs and provide detailed annotation information for the repeats.</p>
            <p>Here we present DirectRepeateR, an open-source R package that fills this gap. DirectRepeateR performs de novo identification of exact direct repeat pairs within a user-specified distance and immediately returns analysis-ready CSV files or optional GFF files. DirectRepeateR is implemented entirely in R, and all user interaction remains within the R environment, making it easily accessible to researchers with minimal coding experience. DirectRepeateR offers a complementary approach by focusing specifically on the identification of exact direct repeat pairs at user-defined spatial and length resolutions. This fine-scale, length-consistent detection is particularly useful for studying localized structural features, such as repeat-mediated recombination or deletion events.</p>
            <p>To illustrate its use, we map direct repeat pairs in the 
                <italic toggle="yes">Aedes aegypti</italic> genome and, using a Monte Carlo simulation, test whether the distribution of direct repeat pairs in 
                <italic toggle="yes">Aedes aegypti</italic> is consistent with selection against their presence flanking exons.</p>
        </sec>
        <sec id="sec6" sec-type="methods">
            <title>Methods</title>
            <sec id="sec7">
                <title>Implementation</title>
                <p>

                    <italic toggle="yes">The DirectRepeateR package</italic>
                </p>
                <p>DirectRepeateR is an R package designed to identify, annotate, and visualize nearby direct repeat sequences within genome assemblies. It offers a flexible solution for detecting direct repeats using a 
                    <italic toggle="yes">de novo</italic> approach, making it useful for model and non-model organisms. This package features three functions: 
                    <monospace>GetRepeats</monospace>, 
                    <monospace>ConvertToGFF</monospace>, and 
                    <monospace>PlotRepeats</monospace>. We made a vignette that provides guidance on how to use each function (S1).</p>
                <p>

                    <italic toggle="yes">GetRepeats function</italic>
                </p>
                <p>The function 
                    <monospace>GetRepeats</monospace> is designed to identify direct repeat sequences from genome assemblies, leveraging R for a simple user interface and C++ for efficient processing. While this package incorporates a C++ backend for performance, all user interaction takes place through the R interface, keeping the software user-friendly and accessible to researchers with basic R skills. The function takes a genome assembly in FASTA format as the input along with the parameters 
                    <monospace>query_length</monospace> (length of the substring used to search for repeats), 
                    <monospace>maxdist</monospace> (the window size in which we search for repeats), and 
                    <monospace>minlength</monospace> (the minimum length to be considered a repeat). These parameters allow user control over the query sequence length, the maximum distance between repeat copies, and the minimum repeat length, respectively. If these parameters are not provided, default values are used (
                    <monospace>query_length</monospace> = 25, 
                    <monospace>maxdist</monospace> = 20,000, and 
                    <monospace>minlength</monospace> = 50). These default settings are based on common expectations about the repeat structures targeted by the SSA pathway.
                    <sup>
                        <xref ref-type="bibr" rid="ref14">14</xref>
                    </sup>
                </p>
                <p>The function begins by using the C++ backend function through the 
                    <monospace>Rcpp</monospace> package.
                    <sup>
                        <xref ref-type="bibr" rid="ref20">20</xref>
                    </sup> For each sequence in the FASTA file, this C++ function extracts chromosome lengths and names from the genome and uses these to organize the repeat information. The implemented algorithm is based on sliding across the genome in steps equal to the query length. For each query, the algorithm searches for all exact matches within the range defined by 
                    <monospace>maxdist</monospace>, representing the maximum allowable distance between repeat copies. This process is repeated until the end of the sequence is reached. When matches are found, the start and end positions of the first element and the match are recorded. As this process is completed for each sequence in the FASTA file, data frames containing the position information are written out as temporary chromosomal CSV files. After the C++ routine completes, 
                    <monospace>GetRepeats</monospace> processes all the generated chromosomal CSV files inside of R. These temporary files are processed using the 
                    <monospace>minlength</monospace> argument and are combined to provide the final comprehensive list of detected direct repeats, including their start and end positions along with the positions of their corresponding matches.</p>
                <p>

                    <italic toggle="yes">ConvertToGFF function</italic>
                </p>
                <p>The 
                    <monospace>ConvertToGFF</monospace> function is designed to convert repeat data into a GFF (General Feature Format) file, a standard format for describing genes and other features in genomes. The user provides the data frame, provided by the 
                    <monospace>GetRepeats</monospace> function containing the identified direct repeats, and the function works by preallocating vectors to store the GFF entries, including fields for chromosomes, source, feature types (repeat_region for the full length and repeat_unit for individual copies), start and end positions, and attributes. Each repeat in the data frame is processed to generate three GFF entries: one for the full repeat region and two for the individual elements of the repeat.</p>
                <p>

                    <italic toggle="yes">PlotRepeats function</italic>
                </p>
                <p>The 
                    <monospace>PlotRepeats</monospace> function uses the 
                    <monospace>ggplot2</monospace> package
                    <sup>
                        <xref ref-type="bibr" rid="ref21">21</xref>
                    </sup> and generates visualizations of repeat densities across chromosomes using a sliding window approach. This function uses the data generated by the 
                    <monospace>GetRepeats</monospace> function and allows users to specify window and step sizes (defaulting to 200 Kb for both if not provided). The function processes each chromosome by first calculating the midpoint of each repeat in the file and then sliding windows across the chromosome length. For each window, it counts the number of repeats in the window. It then generates a plot of repeat density along the chromosome.</p>
            </sec>
            <sec id="sec8">
                <title>Operation</title>
                <p>The DirectRepeateR package requires 
                    <monospace>Rcpp</monospace> (&gt;= 1.1.0), and therefore requires a version of R &gt;= 4.4.1 (available from 
                    <ext-link ext-link-type="uri" xlink:href="http://www.r-project.org">www.r-project.org</ext-link>). Users can use the 
                    <monospace>devtools</monospace> package to install DirectRepeateR from GitHub (
                    <ext-link ext-link-type="uri" xlink:href="https://github.com/coleoguy/DirectRepeateR">https://github.com/coleoguy/DirectRepeateR</ext-link>).</p>
            </sec>
            <sec id="sec9">
                <title>Calculation of observed vs. expected counts of Flanked Exons</title>
                <p>To illustrate the use of our package, we analyzed the 
                    <italic toggle="yes">A. aegypti</italic> genome (GCF_002204515.2) with DirectRepeateR. The 
                    <italic toggle="yes">A. aegypti</italic> genome is approximately 1.3 Gb in size, with 65% being repetitive content.
                    <sup>
                        <xref ref-type="bibr" rid="ref22">22</xref>
                    </sup> We used the DirectRepeateR package to map the location of direct repeats within this genome. For this, we define direct repeats as sequences that have a minimum element size of 50 bp, are oriented in the same direction, and are separated by no more than 20 Kb.</p>
                <p>We then used the output from DirectRepeateR to explore whether the distribution of direct repeats was consistent with the hypothesis that selection should limit their proximity to exons. For this project, we considered an exon flanked if it was either spanned or overlapped by direct repeats. Essentially, any orientation where single-strand annealing would be expected to lead to the loss of exonic DNA was considered a flanking arrangement.</p>
                <p>We calculated the observed and expected numbers of exons flanked by direct repeats in the 
                    <italic toggle="yes">A. aegypti</italic> genome. To get the observed counts, we used the direct repeat information produced by DirectRepeateR and the annotation file from NCBI (GCF_002204515.2). The GFF file was filtered to retain only unique protein-coding exons, and these exon locations were then compared with repeat locations to identify exons flanked by repeats. To determine expected counts, we employed a Monte Carlo simulation to create a null distribution of the number of flanked exons. We generated direct repeat positions by randomly sampling locations based on the length of each chromosome while maintaining the size of each repeat copy and the distance between copies. When the gap between repeats was under 26 bp, the subsequent repeat was placed by offsetting from the repositioned end of the prior repeat by the same gap, maintaining clustering. After positioning the repeats randomly, we used the same method described above to evaluate the number of flanked exons. This process was repeated 100 times to generate a null distribution. While not included in the DirectRepeateR package, the scripts used for this analysis are provided in a GitHub repository for researchers to use and adapt for their own needs. AI was used to revise software and analysis code to maximize efficiency.</p>
            </sec>
        </sec>
        <sec id="sec10" sec-type="results">
            <title>Results</title>
            <sec id="sec11">
                <title>Performance</title>
                <p>We assessed the runtime of our repeat detection algorithm across a range of species with varying genome sizes. We used the genomes of 
                    <italic toggle="yes">Caenorhabditis elegans</italic> (GCF_000002985.6), 
                    <italic toggle="yes">Anopheles gambiae</italic> (GCF_943734735.2), 
                    <italic toggle="yes">Vitis vinifera</italic> (GCF_030704535.1), 
                    <italic toggle="yes">Oryzias latipes</italic> (GCF_002234675.1), 
                    <italic toggle="yes">Gallus gallus</italic> (GCF_016699485.2), 
                    <italic toggle="yes">Aedes aegypti</italic> (GCF_002204515.2), Mus musculus (GCF_000001635.27), and 
                    <italic toggle="yes">Homo sapiens</italic> (GCF_000001405.40), which span genome sizes from 100.3 Mb to over 3.1 Gb (
                    <xref ref-type="fig" rid="f2">Figure 2</xref>). Runtime scaled linearly with genome size (slope = 0.0677, R
                    <sup>2</sup> = 0.96, p = 2.33 &#x00d7; 10
                    <sup>&#x2212;5</sup>), indicating a strong relationship with minimal residual variation. This supports the consistent performance of the method across a wide range of genome sizes. Smaller genomes, such as 
                    <italic toggle="yes">C. elegans</italic> (100.3 Mb), completed in under five minutes, while the largest genome, 
                    <italic toggle="yes">H. sapiens</italic> (3.1 Gb), required approximately 3.6 hours.</p>
                <fig fig-type="figure" id="f2" orientation="portrait" position="float">
                    <label>
Figure 2. </label>
                    <caption>
                        <title>Runtime of the repeat detection algorithm across genomes of varying sizes.</title>
                        <p>The plot shows the relationship between genome size (in megabases, Mb) and runtime (in minutes) for eight species. The blue trend line indicates linear regression (R
                            <sup>2</sup> = 0.96).</p>
                    </caption>
                    <graphic id="gr2" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/188317/d9ca97c5-9a04-4550-bec6-7e2b7df83a5e_figure2.gif"/>
                </fig>
            </sec>
            <sec id="sec12">
                <title>Observed vs. expected counts of Flanked Exons</title>
                <p>In the 
                    <italic toggle="yes">A. aegypti</italic> genome, we observed that 5,782 out of 80,498 exons were flanked by direct repeats. The expected number, based on simulations, was calculated as just under 40,000, highlighting a significant deviation from what our null model predicted (
                    <xref ref-type="fig" rid="f3">Figure 3</xref>). When we analyzed the data by chromosome, chromosome 1 had 1,247 flanked exons, chromosome 2 had 2,427, and chromosome 3 had 2,108 (
                    <xref ref-type="fig" rid="f3">Figure 3</xref>). In all cases, we found that observed values were markedly lower than the expected numbers, with the simulations predicting nearly 14,000 flanked exons for chromosomes 2 and 3, and approximately 10,000 for chromosome 1. These results reveal a consistent pattern across the genome, where the number of exons flanked by direct repeats is far below random expectation.</p>
                <fig fig-type="figure" id="f3" orientation="portrait" position="float">
                    <label>
Figure 3. </label>
                    <caption>
                        <title>Empirical vs. null counts of flanked exons in the 
                            <italic toggle="yes">Aedes aegypti</italic> genome.</title>
                        <p>The null distribution (colored points) represents the expected number of flanked exons under a random model, while the empirical values (colored points with black border) show the actual observed counts. Each chromosome and the genome-wide totals are presented separately. The left y-axis (blue) corresponds to chromosome-level comparisons, and the right y-axis (orange) represents genome-wide values.</p>
                    </caption>
                    <graphic id="gr3" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/188317/d9ca97c5-9a04-4550-bec6-7e2b7df83a5e_figure3.gif"/>
                </fig>
                <p>To investigate the relationship between gene density and repeat content, we analyzed the distribution of genes and direct repeats across 200 Kb windows along three chromosomes of 
                    <italic toggle="yes">Aedes aegypti.</italic> For each chromosome, gene and direct repeat counts were calculated within each sliding window. To visualize the data, we created a scatterplot of repeat counts on a log scale against gene counts. Fitting a linear model to this data showed a significant negative relationship (slope = -10.34, R
                    <sup>2</sup> = 0.006, p-value = 1.9e-09), suggesting that regions with higher gene densities tend to have lower numbers of repeats (
                    <xref ref-type="fig" rid="f4">Figure 4</xref>).</p>
                <fig fig-type="figure" id="f4" orientation="portrait" position="float">
                    <label>
Figure 4. </label>
                    <caption>
                        <title>Scatterplot of repeat counts against gene counts in 200 Kb windows across the 
                            <italic toggle="yes">Aedes aegypti</italic> genome.</title>
                        <p>To improve visualization, repeat counts were log-transformed prior to plotting; however, the vertical axis was back-transformed to ensure values remain directly interpretable. Each point represents a window, with the horizontal showing the gene count per window and the vertical axis showing the repeat count. The blue trend line indicates linear regression (R
                            <sup>2</sup> = 0.006).</p>
                    </caption>
                    <graphic id="gr4" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/188317/d9ca97c5-9a04-4550-bec6-7e2b7df83a5e_figure4.gif"/>
                </fig>
            </sec>
        </sec>
        <sec id="sec13" sec-type="discussion">
            <title>Discussion</title>
            <sec id="sec14">
                <title>Purifying selection on repeats</title>
                <p>Our analysis shows that exons are flanked by proximal direct repeat pairs less frequently than expected in 
                    <italic toggle="yes">A. aegypti</italic>, and repeat density decreases in gene-rich regions. Since direct repeats can cause deletions via the SSA pathway, their depletion near genes is likely due to purifying selection acting to minimize mutational risk. Across multiple species, purifying selection appears to act on repeats. Studies have suggested that selection may limit TE accumulation near genes,
                    <sup>
                        <xref ref-type="bibr" rid="ref23">23</xref>
                    </sup> restrict TEs to AT-rich, gene-poor regions,
                    <sup>
                        <xref ref-type="bibr" rid="ref24">24</xref>
                    </sup> and constrain microsatellite variation due to its potential to disrupt gene function.
                    <sup>
                        <xref ref-type="bibr" rid="ref25">25</xref>
                    </sup> Together, these findings support the view that purifying selection acts broadly across genome landscapes to restrict repeats from genic regions where their expansion or instability would be most harmful.</p>
            </sec>
            <sec id="sec15">
                <title>Limitations &amp; Future work</title>
                <p>The current version of DirectRepeateR is intentionally streamlined for one very specific task&#x2014;detecting exact, proximal direct repeat pairs. As such, several broader repeat-analysis features are outside its present scope. First, the algorithm currently searches for perfect matches in the same 5&#x2032;&#x2192;3&#x2032; orientation. The package does not yet identify degenerated, inverted, or mirror repeats. Additionally, DirectRepeateR identifies exact matches by stepping across the genome in increments equal to the specified query length (e.g., 25 base pairs), which can result in missing some portion of the start or end of a repeat if the window does not align perfectly with the repeat&#x2019;s boundaries. Specifically, this approach may miss anywhere from zero base pairs to the query length minus one at both the beginning and the end of each repeat. This number of base pairs lost from each repeat is uniformly distributed given that repeats can start at any arbitrary position along the genome. This limitation creates a trade-off between runtime and completeness. A smaller query length increases the number of iterations and runtime but improves completeness by aligning the window more accurately with repeat boundaries, reducing missed base pairs. A larger query length decreases runtime by reducing iterations but may miss more base pairs, compromising completeness. These constraints reflect deliberate design choices that prioritise computational efficiency and user-ease for direct repeat studies, but they also highlight clear avenues for future development, most notably adding support for near-identical repeats and inverted orientations.</p>
                <p>DirectRepeateR offers a targeted and accessible approach for identifying exact, proximal direct repeat pairs in genome assemblies. Researchers can fine-tune resolution and spatial sensitivity using adjustable parameters, such as maximum repeat distance, to detect repeat structures relevant to genome stability. Fully implemented in R, the package provides a lightweight interface suitable for users with minimal coding experience. DirectRepeateR complements existing repeat-annotation tools by filling a practical gap in the detection of spatially localized, length-consistent repeat pairs.</p>
            </sec>
        </sec>
        <sec id="sec16">
            <title>Code availability</title>
            <p>Code for analysis available from: 
                <ext-link ext-link-type="uri" xlink:href="https://github.com/coleoguy/DRproj">https://github.com/coleoguy/DRproj</ext-link>
                <sup>
                    <xref ref-type="bibr" rid="ref26">26</xref>
                </sup>
            </p>
            <p>Archived source at time of publication: 10.5281/zenodo.17073598</p>
            <p>This project contains:
                <list list-type="bullet">
                    <list-item>
                        <label>&#x2022;</label>
                        <p>scripts/* (R scripts for repeat annotation and observed flanking exon analysis)</p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>results/* (Outputs from the R scripts including repeat counts and flanked exon results)</p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>figures/* (Visualizations generated for the manuscript)</p>
                    </list-item>
                </list>
            </p>
            <p>License: 
                <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International</ext-link> (CC-BY 4.0)</p>
        </sec>
        <sec id="sec17">
            <title>Software availability</title>
            <p>Source code available from: 
                <ext-link ext-link-type="uri" xlink:href="https://github.com/coleoguy/DirectRepeateR">https://github.com/coleoguy/DirectRepeateR</ext-link>
                <sup>
                    <xref ref-type="bibr" rid="ref27">27</xref>
                </sup>
            </p>
            <p>Archived source at time of publication: 10.5281/zenodo.16275409</p>
            <p>License: OSI approved open license software is under MIT</p>
        </sec>
    </body>
    <back>
        <sec id="sec21" sec-type="data-availability">
            <title>Data availability</title>
            <sec id="sec22">
                <title>Underlying data</title>
                <p>Reference genomes and annotation files used for repeat annotation analysis were downloaded from NCBI:
                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>

                                <italic toggle="yes">Aedes aegypti</italic>: GCF_002204515.2</p>
                        </list-item>
                    </list>
                </p>
            </sec>
        </sec>
        <ack>
            <title>Acknowledgements</title>
            <p>Not applicable.</p>
        </ack>
        <ref-list>
            <title>References</title>
            <ref id="ref1">
                <label>1</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Britten</surname>
                            <given-names>RJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kohne</surname>
                            <given-names>DE</given-names>
                        </name>
</person-group>:
                    <article-title>Repeated sequences in DNA. Hundreds of thousands of copies of DNA sequences have been incorporated into the genomes of higher organisms.</article-title>
                    <source>

                        <italic toggle="yes">Science.</italic>
</source>
                    <year>1968</year>;<volume>161</volume>:<fpage>529</fpage>&#x2013;<lpage>540</lpage>.
                    <pub-id pub-id-type="doi">10.1126/science.161.3841.529</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref2">
                <label>2</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Liao</surname>
                            <given-names>X</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Zhu</surname>
                            <given-names>W</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Zhou</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Repetitive DNA sequence detection and its role in the human genome.</article-title>
                    <source>

                        <italic toggle="yes">Commun Biol.</italic>
</source>
                    <year>2023</year>;<volume>6</volume>:<fpage>954</fpage>.
                    <pub-id pub-id-type="pmid">37726397</pub-id>
                    <pub-id pub-id-type="doi">10.1038/s42003-023-05322-y</pub-id>
                    <pub-id pub-id-type="pmcid">PMC10509279</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref3">
                <label>3</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Palazzo</surname>
                            <given-names>AF</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gregory</surname>
                            <given-names>TR</given-names>
                        </name>
</person-group>:
                    <article-title>The case for junk DNA.</article-title>
                    <source>

                        <italic toggle="yes">PLoS Genet.</italic>
</source>
                    <year>2014</year>;<volume>10</volume>:<fpage>e1004351</fpage>.
                    <pub-id pub-id-type="pmid">24809441</pub-id>
                    <pub-id pub-id-type="doi">10.1371/journal.pgen.1004351</pub-id>
                    <pub-id pub-id-type="pmcid">PMC4014423</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref4">
                <label>4</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Neguembor</surname>
                            <given-names>MV</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gabellini</surname>
                            <given-names>D</given-names>
                        </name>
</person-group>:
                    <article-title>In junk we trust: repetitive DNA, epigenetics and facioscapulohumeral muscular dystrophy.</article-title>
                    <source>

                        <italic toggle="yes">Epigenomics.</italic>
</source>
                    <year>2010</year>;<volume>2</volume>:<fpage>271</fpage>&#x2013;<lpage>287</lpage>.
                    <pub-id pub-id-type="pmid">22121874</pub-id>
                    <pub-id pub-id-type="doi">10.2217/epi.10.8</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref5">
                <label>5</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Makalowski</surname>
                            <given-names>W</given-names>
                        </name>
</person-group>:
                    <article-title>Not Junk After All.</article-title>
                    <source>

                        <italic toggle="yes">Science.</italic>
</source>
                    <year>2003</year>;<volume>300</volume>:<fpage>1246</fpage>&#x2013;<lpage>1247</lpage>.
                    <pub-id pub-id-type="doi">10.1126/science.1085690</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref6">
                <label>6</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ellegren</surname>
                            <given-names>H</given-names>
                        </name>
</person-group>:
                    <article-title>Heterogeneous mutation processes in human microsatellite DNA sequences.</article-title>
                    <source>

                        <italic toggle="yes">Nat. Genet.</italic>
</source>
                    <year>2000</year>;<volume>24</volume>:<fpage>400</fpage>&#x2013;<lpage>402</lpage>.
                    <pub-id pub-id-type="pmid">10742106</pub-id>
                    <pub-id pub-id-type="doi">10.1038/74249</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref7">
                <label>7</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Wahls</surname>
                            <given-names>WP</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wallace</surname>
                            <given-names>LJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Moore</surname>
                            <given-names>PD</given-names>
                        </name>
</person-group>:
                    <article-title>The Z-DNA motif d(TG)30 promotes reception of information during gene conversion events while stimulating homologous recombination in human cells in culture.</article-title>
                    <source>

                        <italic toggle="yes">Mol. Cell. Biol.</italic>
</source>
                    <year>1990</year>;<volume>10</volume>:<fpage>785</fpage>&#x2013;<lpage>793</lpage>.
                    <pub-id pub-id-type="pmid">2405255</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref8">
                <label>8</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Mirkin</surname>
                            <given-names>SM</given-names>
                        </name>
</person-group>:
                    <article-title>DNA structures, repeat expansions and human hereditary disorders.</article-title>
                    <source>

                        <italic toggle="yes">Curr. Opin. Struct. Biol.</italic>
</source>
                    <year>2006</year>;<volume>16</volume>:<fpage>351</fpage>&#x2013;<lpage>358</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.sbi.2006.05.004</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref9">
                <label>9</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Jonika</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lo</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Blackmon</surname>
                            <given-names>H</given-names>
                        </name>
</person-group>:
                    <article-title>Mode and Tempo of Microsatellite Evolution across 300 Million Years of Insect Evolution.</article-title>
                    <source>

                        <italic toggle="yes">Genes.</italic>
</source>
                    <year>2020</year>;<volume>11</volume>:<fpage>11</fpage>.
                    <pub-id pub-id-type="pmid">32824315</pub-id>
                    <pub-id pub-id-type="doi">10.3390/genes11080945</pub-id>
                    <pub-id pub-id-type="pmcid">PMC7464534</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref10">
                <label>10</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Gebhardt</surname>
                            <given-names>F</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Z&#x00e4;nker</surname>
                            <given-names>KS</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Brandt</surname>
                            <given-names>B</given-names>
                        </name>
</person-group>:
                    <article-title>Modulation of epidermal growth factor receptor gene transcription by a polymorphic dinucleotide repeat in intron 1.</article-title>
                    <source>

                        <italic toggle="yes">J. Biol. Chem.</italic>
</source>
                    <year>1999</year>;<volume>274</volume>:<fpage>13176</fpage>&#x2013;<lpage>13180</lpage>.
                    <pub-id pub-id-type="pmid">10224073</pub-id>
                    <pub-id pub-id-type="doi">10.1074/jbc.274.19.13176</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref11">
                <label>11</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Contente</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Dittmer</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Koch</surname>
                            <given-names>MC</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A polymorphic microsatellite that mediates induction of PIG3 by p53.</article-title>
                    <source>

                        <italic toggle="yes">Nat. Genet.</italic>
</source>
                    <year>2002</year>;<volume>30</volume>:<fpage>315</fpage>&#x2013;<lpage>320</lpage>.
                    <pub-id pub-id-type="pmid">11919562</pub-id>
                    <pub-id pub-id-type="doi">10.1038/ng836</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref12">
                <label>12</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Malhotra</surname>
                            <given-names>N</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Seshasayee</surname>
                            <given-names>ASN</given-names>
                        </name>
</person-group>:
                    <article-title>Replication-dependent organization constrains positioning of long DNA repeats in bacterial genomes.</article-title>
                    <source>

                        <italic toggle="yes">Genome Biol. Evol.</italic>
</source>
                    <year>2022</year>;<volume>14</volume>:<fpage>evac102</fpage>.
                    <pub-id pub-id-type="pmid">35776426</pub-id>
                    <pub-id pub-id-type="doi">10.1093/gbe/evac102</pub-id>
                    <pub-id pub-id-type="pmcid">PMC9297083</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref13">
                <label>13</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Khaidakov</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Siegel</surname>
                            <given-names>ER</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Shmookler Reis</surname>
                            <given-names>RJ</given-names>
                        </name>
</person-group>:
                    <article-title>Direct repeats in mitochondrial DNA and mammalian lifespan.</article-title>
                    <source>

                        <italic toggle="yes">Mech. Ageing Dev.</italic>
</source>
                    <year>2006</year>;<volume>127</volume>:<fpage>808</fpage>&#x2013;<lpage>812</lpage>.
                    <pub-id pub-id-type="pmid">16956646</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.mad.2006.07.008</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref14">
                <label>14</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Bhargava</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Onyango</surname>
                            <given-names>DO</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Stark</surname>
                            <given-names>JM</given-names>
                        </name>
</person-group>:
                    <article-title>Regulation of Single-Strand Annealing and its Role in Genome Maintenance.</article-title>
                    <source>

                        <italic toggle="yes">Trends Genet.</italic>
</source>
                    <year>2016</year>;<volume>32</volume>:<fpage>566</fpage>&#x2013;<lpage>575</lpage>.
                    <pub-id pub-id-type="pmid">27450436</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.tig.2016.06.007</pub-id>
                    <pub-id pub-id-type="pmcid">PMC4992407</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref15">
                <label>15</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Nishimura</surname>
                            <given-names>D</given-names>
                        </name>
</person-group>:
                    <article-title>RepeatMasker.</article-title>
                    <source>

                        <italic toggle="yes">Biotech Software &amp; Internet Report.</italic>
</source>
                    <year>2000</year>;<volume>1</volume>:<fpage>36</fpage>&#x2013;<lpage>39</lpage>.
                    <pub-id pub-id-type="doi">10.1089/152791600319259</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref16">
                <label>16</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Xiong</surname>
                            <given-names>W</given-names>
                        </name>

                        <name name-style="western">
                            <surname>He</surname>
                            <given-names>L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lai</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>HelitronScanner uncovers a large overlooked cache of 
                        <italic toggle="yes">Helitron</italic> transposons in many plant genomes.</article-title>
                    <source>

                        <italic toggle="yes">Proc. Natl. Acad. Sci.</italic>
</source>
                    <year>2014</year>;<volume>111</volume>:<fpage>10263</fpage>&#x2013;<lpage>10268</lpage>.
                    <pub-id pub-id-type="pmid">24982153</pub-id>
                    <pub-id pub-id-type="doi">10.1073/pnas.1410068111</pub-id>
                    <pub-id pub-id-type="pmcid">PMC4104883</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref17">
                <label>17</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Edgar</surname>
                            <given-names>RC</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Myers</surname>
                            <given-names>EW</given-names>
                        </name>
</person-group>:
                    <article-title>PILER: identification and classification of genomic repeats.</article-title>
                    <source>

                        <italic toggle="yes">Bioinformatics.</italic>
</source>
                    <year>2005</year>;<volume>21 Suppl 1</volume>:<fpage>i152</fpage>&#x2013;<lpage>i158</lpage>.
                    <pub-id pub-id-type="pmid">15961452</pub-id>
                    <pub-id pub-id-type="doi">10.1093/bioinformatics/bti1003</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref18">
                <label>18</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Flynn</surname>
                            <given-names>JM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hubley</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Goubert</surname>
                            <given-names>C</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>RepeatModeler2 for automated genomic discovery of transposable element families.</article-title>
                    <source>

                        <italic toggle="yes">Proc. Natl. Acad. Sci. USA.</italic>
</source>
                    <year>2020</year>;<volume>117</volume>:<fpage>9451</fpage>&#x2013;<lpage>9457</lpage>.
                    <pub-id pub-id-type="pmid">32300014</pub-id>
                    <pub-id pub-id-type="doi">10.1073/pnas.1921046117</pub-id>
                    <pub-id pub-id-type="pmcid">PMC7196820</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref19">
                <label>19</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kalendar</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kairov</surname>
                            <given-names>U</given-names>
                        </name>
</person-group>:
                    <article-title>Genome-wide tool for sensitive de novo identification and visualisation of interspersed and tandem repeats.</article-title>
                    <source>

                        <italic toggle="yes">Bioinform Biol Insights.</italic>
</source>
                    <year>2024</year>;<volume>18</volume>:<fpage>11779322241306391</fpage>.
                    <pub-id pub-id-type="pmid">39703748</pub-id>
                    <pub-id pub-id-type="doi">10.1177/11779322241306391</pub-id>
                    <pub-id pub-id-type="pmcid">PMC11656428</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref20">
                <label>20</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Eddelbuettel</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Balamuta</surname>
                            <given-names>JJ</given-names>
                        </name>
</person-group>:
                    <article-title>Extending R with C++: A brief introduction to Rcpp.</article-title>
                    <source>

                        <italic toggle="yes">Am. Stat.</italic>
</source>
                    <year>2018</year>;<volume>72</volume>:<fpage>28</fpage>&#x2013;<lpage>36</lpage>.
                    <pub-id pub-id-type="doi">10.1080/00031305.2017.1375990</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref21">
                <label>21</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Wickham</surname>
                            <given-names>H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Chang</surname>
                            <given-names>W</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wickham</surname>
                            <given-names>MH</given-names>
                        </name>
</person-group>:
                    <article-title>Package &#x201c;ggplot2.&#x201d; Create elegant data visualisations using the grammar of graphics Version.</article-title>
                    <year>2016</year>;<volume>2</volume>:<fpage>1</fpage>&#x2013;<lpage>189</lpage>.</mixed-citation>
            </ref>
            <ref id="ref22">
                <label>22</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Matthews</surname>
                            <given-names>BJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Dudchenko</surname>
                            <given-names>O</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kingan</surname>
                            <given-names>SB</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Improved reference genome of Aedes aegypti informs arbovirus vector control.</article-title>
                    <source>

                        <italic toggle="yes">Nature.</italic>
</source>
                    <year>2018</year>;<volume>563</volume>:<fpage>501</fpage>&#x2013;<lpage>507</lpage>.
                    <pub-id pub-id-type="pmid">30429615</pub-id>
                    <pub-id pub-id-type="doi">10.1038/s41586-018-0692-z</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref23">
                <label>23</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Horvath</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Minadakis</surname>
                            <given-names>N</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bourgeois</surname>
                            <given-names>Y</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>The evolution of transposable elements in Brachypodium distachyon is governed by purifying selection, while neutral and adaptive processes play a minor role.</article-title>
                    <source>

                        <italic toggle="yes">elife.</italic>
</source>
                    <year>2024</year>;<volume>12</volume>:<fpage>12</fpage>.
                    <pub-id pub-id-type="pmid">38606833</pub-id>
                    <pub-id pub-id-type="doi">10.7554/eLife.93284</pub-id>
                    <pub-id pub-id-type="pmcid">PMC11014726</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref24">
                <label>24</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Rouxel</surname>
                            <given-names>T</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Grandaubert</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hane</surname>
                            <given-names>JK</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Effector diversification within compartments of the Leptosphaeria maculans genome affected by Repeat-Induced Point mutations.</article-title>
                    <source>

                        <italic toggle="yes">Nat. Commun.</italic>
</source>
                    <year>2011</year>;<volume>2</volume>:<fpage>202</fpage>.</mixed-citation>
            </ref>
            <ref id="ref25">
                <label>25</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Press</surname>
                            <given-names>MO</given-names>
                        </name>

                        <name name-style="western">
                            <surname>McCoy</surname>
                            <given-names>RC</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hall</surname>
                            <given-names>AN</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Massive variation of short tandem repeats with functional consequences across strains of Arabidopsis thaliana.</article-title>
                    <source>

                        <italic toggle="yes">Genome Res.</italic>
</source>
                    <year>2018</year>;<volume>28</volume>:<fpage>1169</fpage>&#x2013;<lpage>1178</lpage>.
                    <pub-id pub-id-type="pmid">29970452</pub-id>
                    <pub-id pub-id-type="doi">10.1101/gr.231753.117</pub-id>
                    <pub-id pub-id-type="pmcid">PMC6071631</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref26">
                <label>26</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Copeland</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Blackmon</surname>
                            <given-names>H</given-names>
                        </name>
</person-group>:
                    <article-title>coleoguy/DRproj: Initial release.</article-title>
                    <source>

                        <italic toggle="yes">Zenodo.</italic>
</source>
                    <year>2025</year>.
                    <pub-id pub-id-type="doi">10.5281/ZENODO.17073598</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref27">
                <label>27</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Copeland</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Blackmon</surname>
                            <given-names>H</given-names>
                        </name>
</person-group>:
                    <article-title>coleoguy/DirectRepeateR: DirectRepeateR - Initial Release.</article-title>
                    <source>

                        <italic toggle="yes">Zenodo.</italic>
</source>
                    <year>2025</year>.
                    <pub-id pub-id-type="doi">10.5281/ZENODO.16275409</pub-id>
                </mixed-citation>
            </ref>
        </ref-list>
    </back>
</article>
