<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN" "http://jats.nlm.nih.gov/publishing/1.2/JATS-journalpublishing1.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="other" dtd-version="1.2" xml:lang="en">
    <front>
        <journal-meta>
            <journal-id journal-id-type="pmc">F1000Research</journal-id>
            <journal-title-group>
                <journal-title>F1000Research</journal-title>
            </journal-title-group>
            <issn pub-type="epub">2046-1402</issn>
            <publisher>
                <publisher-name>F1000 Research Limited</publisher-name>
                <publisher-loc>London, UK</publisher-loc>
            </publisher>
        </journal-meta>
        <article-meta>
            <article-id pub-id-type="doi">10.12688/f1000research.18952.1</article-id>
            <article-categories>
                <subj-group subj-group-type="heading">
                    <subject>Software Tool Article</subject>
                </subj-group>
                <subj-group>
                    <subject>Articles</subject>
                </subj-group>
            </article-categories>
            <title-group>
                <article-title>Detection and mitigation of spurious antisense expression with RoSA</article-title>
                <fn-group content-type="pub-status">
                    <fn>
                        <p>[version 1; peer review: 2 approved with reservations]</p>
                    </fn>
                </fn-group>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Mour&#x00e3;o</surname>
                        <given-names>Kira</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Software</role>
                    <role content-type="http://credit.niso.org/">Validation</role>
                    <role content-type="http://credit.niso.org/">Visualization</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Schurch</surname>
                        <given-names>Nicholas J.</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Software</role>
                    <role content-type="http://credit.niso.org/">Supervision</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Lucoszek</surname>
                        <given-names>Radek</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a3">3</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Froussios</surname>
                        <given-names>Kimon</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Software</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a4">4</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>MacKinnon</surname>
                        <given-names>Katarzyna</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a5">5</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Duc</surname>
                        <given-names>C&#x00e9;line</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a6">6</xref>
                </contrib>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Simpson</surname>
                        <given-names>Gordon</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Funding Acquisition</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="corresp" rid="c1">a</xref>
                    <xref ref-type="aff" rid="a3">3</xref>
                </contrib>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Barton</surname>
                        <given-names>Geoffrey J. </given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Funding Acquisition</role>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Supervision</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-9014-5355</uri>
                    <xref ref-type="corresp" rid="c2">b</xref>
                    <xref ref-type="aff" rid="a7">7</xref>
                </contrib>
                <aff id="a1">
                    <label>1</label>Synpromics Ltd, Edinburgh, Midlothian, EH25 9RG, UK</aff>
                <aff id="a2">
                    <label>2</label>Biomathematics and Statistics Scotland, James Hutton Institute, Aberdeen, Scotland, AB15 8QH, UK</aff>
                <aff id="a3">
                    <label>3</label>Centre for Gene Regulation &amp; Expression, School of Life Sciences, University of Dundee, Dundee, Scotland, DD1 5EH, UK</aff>
                <aff id="a4">
                    <label>4</label>Research Institute of Molecular Pathology, Vienna, 1030, Austria</aff>
                <aff id="a5">
                    <label>5</label>Cell &amp; Molecular Sciences, James Hutton Institute, Invergowie, Dundee, Scotland, DD2 5DA, UK</aff>
                <aff id="a6">
                    <label>6</label>&#x00c9;quipe &#x00c9;pig&#x00e9;n&#x00e9;tique, Unit&#x00e9; Fonctionnalit&#x00e9; et Ing&#x00e9;nierie des Prot&#x00e9;ines (UFIP) Facult&#x00e9; des Sciences et Techniques, Universit&#x00e9; de Nantes, NANTES, 92208 F44322 CEDEX 3, France</aff>
                <aff id="a7">
                    <label>7</label>Computational Biology, School of Life Sciences, University of Dundee, Dundee, Scotland, DD1 5EH, UK</aff>
            </contrib-group>
            <author-notes>
                <corresp id="c1">
                    <label>a</label>
                    <email xlink:href="mailto:G.G.Simpson@dundee.ac.uk">G.G.Simpson@dundee.ac.uk</email>
                </corresp>
                <corresp id="c2">
                    <label>b</label>
                    <email xlink:href="mailto:g.j.barton@dundee.ac.uk">g.j.barton@dundee.ac.uk</email>
                </corresp>
                <fn>
                    <p id="ffn1">*To whom correspondence should be addressed</p>
                </fn>
                <fn fn-type="conflict">
                    <p>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>7</day>
                <month>6</month>
                <year>2019</year>
            </pub-date>
            <pub-date pub-type="collection">
                <year>2019</year>
            </pub-date>
            <volume>8</volume>
            <elocation-id>819</elocation-id>
            <history>
                <date date-type="accepted">
                    <day>30</day>
                    <month>5</month>
                    <year>2019</year>
                </date>
            </history>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2019 Mour&#x00e3;o K et al.</copyright-statement>
                <copyright-year>2019</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access article distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <self-uri content-type="pdf" xlink:href="https://f1000research.com/articles/8-819/pdf"/>
            <abstract>
                <p>Antisense transcription is known to have a range of impacts on sense gene expression, including (but not limited to) impeding transcription initiation, disrupting post-transcriptional processes, and enhancing, slowing, or even preventing transcription of the sense gene. Strand-specific RNA-Seq protocols preserve the strand information of the original RNA in the data, and so can be used to identify where antisense transcription may be implicated in regulating gene expression. However, our analysis of 199 strand-specific RNA-Seq experiments reveals that spurious antisense reads are often present in these datasets at levels greater than 1% of sense gene expression levels. Furthermore, these levels can vary substantially even between replicates in the same experiment, potentially disrupting any downstream analysis, if the incorrectly assigned antisense counts dominate the set of genes with high antisense transcription levels. Currently, no tools exist to detect or correct for this spurious antisense signal. Our tool, RoSA (Removal of Spurious Antisense), detects the presence of high levels of spurious antisense read alignments in strand-specific RNA-Seq datasets. It uses incorrectly spliced reads on the antisense strand and/or ERCC spike-ins (if present in the data) to calculate both global and gene-specific antisense correction factors. We demonstrate the utility of our tool to filter out spurious antisense transcript counts in an 
                    <italic toggle="yes">Arabidopsis thaliana</italic> RNA-Seq experiment.</p>
                <p>
                    <bold>Availability:</bold> RoSA is open source software available under the GPL licence via the Barton Group GitHub page 
                    <ext-link ext-link-type="uri" xlink:href="https://github.com/bartongroup">https://github.com/bartongroup</ext-link>.</p>
            </abstract>
            <kwd-group kwd-group-type="author">
                <kwd>RNA-seq</kwd>
                <kwd>antisense expression</kwd>
                <kwd>gene expression</kwd>
                <kwd>Arabidopsis thaliana</kwd>
                <kwd>ENCODE</kwd>
            </kwd-group>
            <funding-group>
                <award-group id="fund-1" xlink:href="http://dx.doi.org/10.13039/501100000268">
                    <funding-source>Biotechnology and Biological Sciences Research Council</funding-source>
                    <award-id>BB/M004155/1</award-id>
                    <award-id>BB/M010066/1</award-id>
                </award-group>
                <funding-statement>This work has been supported by the Biotechnology and Biological Sciences Research Council [BB/M004155/1, BB/M010066/1] to G.J.B. and G.G.S.</funding-statement>
                <funding-statement>
                    <italic>The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</italic>
                </funding-statement>
            </funding-group>
        </article-meta>
    </front>
    <body>
        <sec sec-type="intro">
            <title>1. Introduction</title>
            <p>Antisense RNAs are transcribed from the strand opposite to that of the sense transcript of either protein-coding or non- protein-coding genes. They appear to be widespread in all kingdoms of life and can play distinct roles in regulating gene expression or function. Typically, antisense RNAs are non-coding and expressed at lower levels than sense gene transcripts. However, they can exhibit a range of sizes, and may or may not have 5&#x2019; cap or 3&#x2019; poly(A) tails depending on whether they arise from either their own promoters, from divergent promoters, or from copying of sense transcripts by RNA-dependent RNA polymerases (see 
                <xref ref-type="bibr" rid="ref-1">1</xref> and references therein,
                <sup>
                    <xref ref-type="bibr" rid="ref-2">2</xref>&#x2013;
                    <xref ref-type="bibr" rid="ref-4">4</xref>
                </sup>). In 
                <italic toggle="yes">Arabidopsis thaliana</italic>, for example, the transcription of the Flowering Locus C (FLC) gene is known to be affected by transcription of antisense ncRNAs: COOLAIR
                <sup>
                    <xref ref-type="bibr" rid="ref-5">5</xref>,
                    <xref ref-type="bibr" rid="ref-6">6</xref>
                </sup>, a set of ncRNAs antisense to FLC, and COLDAIR
                <sup>
                    <xref ref-type="bibr" rid="ref-7">7</xref>
                </sup>, antisense to COOLAIR. Both COLDAIR and COOLAIR are associated with different changes in sense strand gene expression at the FLC locus
                <sup>
                    <xref ref-type="bibr" rid="ref-8">8</xref>
                </sup>. Antisense transcription is known to affect sense gene expression through multiple mechanisms
                <sup>
                    <xref ref-type="bibr" rid="ref-1">1</xref>
                </sup>. During transcription, RNA polymerases may physically interfere with each other if both sense and antisense transcription take place simultaneously. Interference can prevent or slow down transcription (e.g. through RNA polymerase collisions
                <sup>
                    <xref ref-type="bibr" rid="ref-9">9</xref>,
                    <xref ref-type="bibr" rid="ref-10">10</xref>
                </sup>) or force particular isoforms to be produced preferentially
                <sup>
                    <xref ref-type="bibr" rid="ref-11">11</xref>
                </sup>. Post-transcriptionally, antisense transcripts can compete with sense transcripts for binding sites
                <sup>
                    <xref ref-type="bibr" rid="ref-12">12</xref>
                </sup>. For example, the transcription of the human haemoglobin gene HBA1 is affected when the LUC7L gene on the opposite strand does not terminate, due to a deletion. It produces an antisense transcript that overlaps with HBA1, and which methylates the HBA1 promoter, repressing its expression
                <sup>
                    <xref ref-type="bibr" rid="ref-13">13</xref>
                </sup>. In addition, since regions of protein coding genes on opposite DNA strands can overlap, their expression effectively generates transcripts that are, to varying extents, antisense to each other. Such overlapping gene pairs are a common feature of genome organization. We and others have shown that in some eukaryotic genomes tail-tail overlap enables the use of pre-mRNA 3&#x2019; processing signals in different registers for genes coded on either strand
                <sup>
                    <xref ref-type="bibr" rid="ref-14">14</xref>
                </sup>. </p>
            <p>Incorporating antisense RNAs into genome annotation and properly quantifying their expression patterns is thus crucial, but remains challenging. Transcriptome-wide identification of RNAs is currently dominated by RNA-Seq. In this widespread experimental approach RNA is rarely sequenced directly, but instead is fragmented and first copied into cDNA and then copied again, so that libraries of DNA are sequenced. However, the copying of RNA by viral-derived reverse transcriptases is problematic. First, these polymerases exhibit DNA dependent polymerase activity, which can result in copies of the cDNA that can be incorrectly interpreted as antisense transcription. Second, just as reverse transcriptases switch template strand in viral biology, they can similarly switch templates in RNA-Seq library preparation, resulting again, in the interpretation of non-authentic antisense RNAs
                <sup>
                    <xref ref-type="bibr" rid="ref-15">15</xref>&#x2013;
                    <xref ref-type="bibr" rid="ref-21">21</xref>
                </sup>. Historically, in microarray and RT-PCR experiments, this step is known to assign some transcripts to the wrong strand, creating spurious antisense transcripts. Preparing samples with actinomycin D can help to reduce the number of spurious antisense transcripts
                <sup>
                    <xref ref-type="bibr" rid="ref-17">17</xref>
                </sup> but can have unwanted side effects
                <sup>
                    <xref ref-type="bibr" rid="ref-20">20</xref>
                </sup>. Alternative approaches to make strand-specific RNA-Seq libraries have been developed to mitigate artefacts arising from reverse transcription, however most of these also use reverse transcription
                <sup>
                    <xref ref-type="bibr" rid="ref-22">22</xref>
                </sup> and so have similar problems with incorrect assignment. For example, the highly-rated
                <sup>
                    <xref ref-type="bibr" rid="ref-22">22</xref>,
                    <xref ref-type="bibr" rid="ref-23">23</xref>
                </sup> and widely used dUTP protocol for stranded RNA-Seq
                <sup>
                    <xref ref-type="bibr" rid="ref-24">24</xref>
                </sup> is known to generate low levels of spurious antisense reads ranging from 0.6&#x2013;3% of the sense signal
                <sup>
                    <xref ref-type="bibr" rid="ref-22">22</xref>,
                    <xref ref-type="bibr" rid="ref-25">25</xref>,
                    <xref ref-type="bibr" rid="ref-26">26</xref>
                </sup>. Ultimately, the direct sequencing of full-length RNA molecules
                <sup>
                    <xref ref-type="bibr" rid="ref-27">27</xref>
                </sup> will overcome many of the problems of distinguishing authentic antisense RNAs. However, currently, reverse-transcriptase based approaches dominate and the extent of spurious antisense RNAs identified in RNA-Seq datasets is rarely exposed.</p>
            <p>In this paper, we analyse spurious antisense reads in 199 RNA-Seq experiments, across multiple organisms from both ENCODE
                <sup>
                    <xref ref-type="bibr" rid="ref-28">28</xref>
                </sup> and our own work. Our results show that spurious antisense reads are often present in experiments, and can manifest at levels greater than 1% of sense transcript levels. Furthermore, the number of spurious antisense reads can vary substantially between replicates within the same experiment. In some cases, this variation may be sufficient to disrupt downstream analysis of antisense gene expression, by causing spurious antisense counts to dominate the set of genes with high antisense transcription levels.</p>
            <p>To detect and correct for wrongly assigned reads we developed a tool, RoSA (Removal of Spurious Antisense), which calculates an antisense correction factor by identifying subsets of reads where all antisense reads are spurious. We evaluate the effect of using RoSA on 
                <italic toggle="yes">Arabidopsis thaliana</italic> experimental data where varying levels of spurious antisense were present in different replicates. RoSA reduces the overall dependence of antisense counts on sense counts, a key indicator of the presence of spurious antisense. For individual genes with different real and spurious antisense characteristics, RoSA reduces spurious antisense counts while retaining the antisense signal.</p>
        </sec>
        <sec sec-type="methods">
            <title>2. Methods</title>
            <p>As noted by Jiang 
                <italic toggle="yes">et al.</italic> (2011,
                <sup>
                    <xref ref-type="bibr" rid="ref-25">25</xref>
                </sup>), spurious antisense read counts can be estimated by analysing either ERCC spike-in data or counts of sense and antisense reads around splice sites. Each approach has different advantages: using spike-ins is simpler and faster, while using spliced reads allows a gene-by-gene estimate to be made. RoSA implements both approaches, in conjunction with pre-processing scripts to generate specialised read counts required by the tool. Once RoSA has an estimate of the levels of spurious antisense, it can adjust the raw antisense counts to account for the incorrectly stranded reads.</p>
            <sec>
                <title>2.1 RoSA: Removal of Spurious Antisense</title>
                <p>Our scripts and analysis code are bundled as a tool, RoSA (Removal of Spurious Antisense), available from the Barton Group&#x2019;s github pages at 
                    <ext-link ext-link-type="uri" xlink:href="https://github.com/bartongroup/RoSA">https://github.com/bartongroup/RoSA</ext-link>. RoSA is an R package supported by two python pre-processing scripts, callable from R.</p>
                <p>For genes with spliced transcripts which are expressed in the data, RoSA uses the subset of reads from either strand that map across the splice junctions. The antisense reads in this subset are almost certainly spurious, and so RoSA can use the read counts to calculate a gene-specific antisense correction factor (Section 2.2). For genes without spliced transcripts, RoSA uses ERCC spike-in data, if present. Here any antisense read mappings are, by definition, spurious and the ratio of sense to antisense reads mapping to the spike-ins thus provides a global, rather than gene-specific, antisense correction factor (Section 2.3). If ERCC spike-in data is not available, RoSA instead calculates a global estimate of the spurious antisense fraction from the set of spliced reads. Counting all, or spliced-only, antisense reads is not directly supported by existing tools. RoSA&#x2019;s pre-processing scripts perform these functions. The 
                    <italic toggle="yes">make_annotation</italic> script creates an antisense annotation (as gtf) from a standard annotation (as gff or gtf), which can then be used to generate antisense read counts via a standard read counting tool (Section 2.4.1). RoSA doesn't specify how the sense and antisense gene expression is counted leaving users free to apply whichever tool they feel will best represent the gene expression in their experiment. However, the accuracy of the corrections calculated by RoSA will be affected by this choice in the same way as the calculation of differential gene expression. If counting methods are used that only consider regions within sense features that do not overlap any antisense feature, the gene-specific corrections calculated by RoSA may be less accurate where the overlap is large and/or the sense or antisense expression is low.</p>
                <p>RoSA then adjusts these raw counts to produce corrected antisense counts (Section 2.4). The 
                    <italic toggle="yes">count_spliced</italic> script generates sense and antisense counts of reads at splice junctions, used when estimating spurious antisense from spliced reads. The script takes a standard annotation (as gtf/gff) and corresponding alignment (as bam) and outputs counts of spliced sense and antisense reads to a designated output file.</p>
                <p>RoSA takes several datasets containing different read counts as its input, for each replicate:</p>
                <list list-type="bullet">
                    <list-item>
                        <label>1. </label>
                        <p>Full read counts by gene</p>
                    </list-item>
                    <list-item>
                        <label>2. </label>
                        <p>Antisense counts by gene (via the 
                            <italic toggle="yes">make_annotation</italic> script)</p>
                    </list-item>
                    <list-item>
                        <label>3. </label>
                        <p>At least one of:</p>
                        <list list-type="bullet">
                            <list-item>
                                <label>a. </label>
                                <p>Spike-in sense and antisense counts</p>
                            </list-item>
                            <list-item>
                                <label>b. </label>
                                <p>Spliced sense and antisense counts (via RoSA's 
                                    <italic toggle="yes">count_spliced</italic> script)</p>
                            </list-item>
                        </list>
                    </list-item>
                </list>
                <p>RoSA calculates and returns antisense:sense ratios for the spike-in data, or spliced read data, or both, and, for each gene and replicate, outputs new read counts values corrected for spurious antisense. RoSA also plots antisense versus sense counts of the original and corrected data, by replicate.</p>
            </sec>
            <sec>
                <title>2.2 Using spliced reads</title>
                <p>RoSA&#x2019;s main approach to estimating spurious antisense is to use spliced reads within the main dataset. Reads which map antisense to a multi-exon gene, and that also show the same splicing pattern as spliced sense-mapping reads are almost certainly spurious, as the splicing motif (canonically GU-AG) will be incorrect on the opposite strand (
                    <xref ref-type="fig" rid="f1">Figure 1</xref>). An estimate of spurious antisense can be calculated by considering only spliced reads whose splices match annotated splice sites (
                    <italic toggle="yes">splice-matched reads</italic>), and, as with the spike-ins, calculating the ratio of antisense to sense reads.</p>
                <fig fig-type="figure" id="f1" orientation="portrait" position="float">
                    <label>Figure 1. </label>
                    <caption>
                        <title>An example of spurious antisense reads displaying the same splicing structure as the sense strand.</title>
                        <p>The reverse strand gene AT4G18970 is strongly expressed in all 3 replicates (bottom tracks). Spurious antisense can also be seen in all replicates (top tracks), with splice points in the antisense signal matching splice points in the sense signal. Furthermore, the level of spurious antisense varies noticeably between replicates. (Figure generated by IGB
                            <sup>
                                <xref ref-type="bibr" rid="ref-32">32</xref>
                            </sup>).</p>
                    </caption>
                    <graphic orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/20776/7407bf18-210b-4de8-aba9-90a17a3786cb_figure1.gif"/>
                </fig>
                <p>Splice-matched reads are identified by first filtering all spliced reads in the data. In a bam file of aligned reads, spliced reads have a CIGAR string containing &#x2018;N&#x2019;, indicating a skipped region. SAM processing tools such as sambamba
                    <sup>
                        <xref ref-type="bibr" rid="ref-29">29</xref>
                    </sup> support filtering on the CIGAR string and can extract spliced reads rapidly. A second filtering step pulls out only those reads whose splice locations match at least one intron in the annotation, by processing each read in turn, identifying the spliced positions (based on the read location and the CIGAR string) and checking the annotation for a matching intron. Finally, the strand of each spliced read can be determined from its flag field value
                    <sup>
                        <xref ref-type="bibr" rid="ref-30">30</xref>
                    </sup>, and compared to the strand of the matching intron(s). Reads on the same strand as the intron(s) are counted as sense reads, and remaining reads as antisense reads. Since spurious antisense reads are misallocated sense reads, the number of antisense splice-matched reads assigned to a gene is strongly positively correlated with the number of its sense splice-matched reads (see Section 3). The ratio of antisense:sense counts on the splice-matched reads thus gives a simple global estimate of the level of spurious antisense across the whole dataset. Using spliced reads has the advantage that an antisense:sense ratio can be calculated on a gene-by-gene basis, for any spliced gene. Genes without any spliced reads fall back on the global estimate, calculated either from the spike-ins (see Section 2.3) or the spliced reads.</p>
                <p>In the case of real, unannotated, antisense expression at a gene locus, the behaviour of RoSA falls into three categories:</p>
                <list list-type="bullet">
                    <list-item>
                        <label>1. </label>
                        <p>If the splicing of the true antisense transcript differs from the sense transcript (including no splicing) then RoSAs gene specific correction will remove any spurious antisense expression (identified by antisense matches to the sense splicing) and leave the true antisense expression unchanged.</p>
                    </list-item>
                    <list-item>
                        <label>2. </label>
                        <p>If the splicing of the antisense expression is the same as the sense strand, then RoSA will remove this completely.</p>
                    </list-item>
                    <list-item>
                        <label>3. </label>
                        <p>If the true antisense splicing is the same as the sense strand in some parts of the transcript, but not across the entire transcript, then RoSA will remove a fraction of the true antisense expression depending on how similar the splicing patters are.</p>
                    </list-item>
                </list>
                <p>We anticipate that occurrences of 2 &amp; 3 will be uncommon in RNA-seq datasets. Point 2 highlights a minor potential limitation of the gene-specific splicing-based corrections calculated by RoSA, namely that it cannot distinguish between spurious anti-sense signal and potential biological transcription from RNA-dependent RNA polymerases (RdRPs). Although RdRPs are widespread in eukaryotic genomes, only 8&#x2013;30% of eukaryotic gene regions have significant length ORFs on their opposite strands
                    <sup>
                        <xref ref-type="bibr" rid="ref-31">31</xref>
                    </sup>, providing an upper limit on the potential impact of this method of transcription on the RNA complement within a cell. Eukaryotic RdRPs evolved independently from their viral counterparts and, in plants, are involved in siRNA transcriptional silencing
                    <sup>
                        <xref ref-type="bibr" rid="ref-33">33</xref>
                    </sup>. This is not the case in animals however (except in 
                    <italic toggle="yes">C. elegans</italic>) where their function remains elusive
                    <sup>
                        <xref ref-type="bibr" rid="ref-34">34</xref>
                    </sup>.</p>
            </sec>
            <sec>
                <title>2.3 Using ERCC spike-ins</title>
                <p>An alternative approach to estimating spurious antisense is to use ERCC spike-in data. Developed by the External RNA Control Consortium (ERCC)
                    <sup>
                        <xref ref-type="bibr" rid="ref-35">35</xref>
                    </sup>, the ERCC spike-in controls are synthetic RNA transcripts that are added to RNA-Seq experiments to act as controls
                    <sup>
                        <xref ref-type="bibr" rid="ref-36">36</xref>
                    </sup>. The 92 spike-ins are designed to mimic a range of eukaryotic mRNA characteristics, varying in length, GC-content and concentration, with a 20bp poly-A tail. They have minimal sequence similarity with known eukaryotic transcripts. Since the spike-ins are synthetic, they are unidirectional, and so any reads assigned as antisense to a spike-in can be assumed to be spurious. As the spike-ins are present at a wide range of concentrations, they are detected with a wide range of read counts, permitting an estimate of the ratio of antisense to sense read counts on the spike-ins to be calculated, which can then be used to estimate the contribution of spurious antisense transcripts across the full dataset. Obtaining sense and antisense counts for the spike-ins is straightforward. First we align the reads to the spike-ins (using the spike-in annotation file ERCC92.gtf, available at 
                    <ext-link ext-link-type="uri" xlink:href="https://www.thermofisher.com/order/catalog/product/4456739">https://www.thermofisher.com/order/catalog/product/4456739</ext-link>) and then count reads, using a strand-aware read counting tool such as featureCounts
                    <sup>
                        <xref ref-type="bibr" rid="ref-37">37</xref>
                    </sup>, HT-SeqCount
                    <sup>
                        <xref ref-type="bibr" rid="ref-38">38</xref>
                    </sup>, etc. Now averaging the spurious antisense:sense ratio across all of the spike-ins gives a global estimate of the spurious antisense, in just the same way as for the spliced reads.</p>
            </sec>
            <sec>
                <title>2.4 Mitigating spurious antisense</title>
                <p>Having identified high or differing levels of spurious antisense in an RNA-Seq experiment, we also want to correct for the incorrectly assigned reads so that true differential expression calling can be performed. The ratio of spurious antisense:sense read counts can be used as a simple correction factor. Defining 
                    <italic toggle="yes">r</italic> as the ratio of spurious antisense:sense and 
                    <italic toggle="yes">S</italic> and 
                    <italic toggle="yes">A</italic> respectively as the number of sense and antisense counts for a gene, the number of spurious antisense read counts 
                    <italic toggle="yes">A
                        <sub>S</sub>
                    </italic> is estimated for each gene as: 
                    <italic toggle="yes">A
                        <sub>S</sub> = r.S .</italic>
                </p>
                <p>Then the antisense count can be corrected to account for the spurious antisense by taking 
                    <italic toggle="yes">A - A
                        <sub>S</sub>
                    </italic>. This correction simply adjusts read counts for each gene, and does not identify specific reads as incorrectly assigned, so pile-ups cannot be adjusted. Since the spurious antisense reads are mis-assigned sense reads, RoSA then adds the spurious antisense count for each gene to its sense count.</p>
                <p>
                    <bold>
                        <italic toggle="yes">2.4.1 Counting antisense reads.</italic>
                    </bold> In order to apply the antisense correction factor, counts of antisense reads for each gene are required. Counting antisense reads is not directly supported by read counting tools. However, it can be performed with featureCounts
                    <sup>
                        <xref ref-type="bibr" rid="ref-37">37</xref>
                    </sup> by setting parameters to indicate that reads are stranded in the opposite direction to which they are. Unfortunately, if there are overlapping genes then reads in the overlaps will be counted twice using this tactic. As reads in regions of gene overlap are necessarily ambiguous, they cannot be considered to be antisense, spurious or otherwise. RoSA avoids this issue by building a custom antisense annotation based on the input sense annotation but excluding regions where genes on opposite strands overlap. Different gene transcripts are accounted for by merging all transcripts for a gene into a single 
                    <italic toggle="yes">maximal transcript</italic>. Whenever exons of different transcripts overlap in the annotation, the exon in the maximal transcript is the maximum extent of both exons. Given a maximal transcript, the script creates an antisense feature on the opposite strand which runs for the full extent of the maximal transcript. If the maximal transcript of another gene overlaps with the antisense feature, then the antisense feature is truncated to avoid overlapping. Once an antisense annotation is available, a read counting tool can be used to count antisense reads, by providing the antisense annotation instead of the standard annotation.</p>
            </sec>
            <sec>
                <title>2.5 
                    <italic toggle="yes">Arabidopsis thaliana</italic> datasets with spurious antisense</title>
                <p>A procedure to experimentally generate RNA-Seq data with specific levels of spurious antisense is not known. Our main experimental data (Experiment 1) is therefore drawn from the study which originally motivated our investigation into incorrectly assigned antisense reads. In this study, spurious antisense occurred by chance at varying orders of magnitude across different replicates. Additionally, we perform a meta-analysis using three other 
                    <italic toggle="yes">Arabidopsis thaliana</italic> datasets (Experiments 2&#x2013;4,
                    <sup>
                        <xref ref-type="bibr" rid="ref-39">39</xref>
                    </sup>) and data from ENCODE (see 
                    <italic toggle="yes">Underlying data</italic> for the full list of the ENA and ENCODE accessions).</p>
                <p>
                    <bold>
                        <italic toggle="yes">2.5.1 Arabidopsis sample preparation and sequencing.</italic>
                    </bold> Briefly, the RNA-Seq data for Experiments 1 is wild-type (WT) 
                    <italic toggle="yes">Arabidopsis thaliana</italic> Colombia-0 (Col-0) biological replicates. WT 
                    <italic toggle="yes">A. thaliana</italic> Col-0 seeds were sown aseptically on MS10 plates. The seeds were stratified for 2 days at 4&#x00b0;C and then grown at a constant 21&#x00b0;C under a 16-h light/8-h dark cycle for a further 14 days, at the end of which the seedlings were harvested. Total RNA was isolated from the seedlings with the RNeasy Plant Mini Kit (Qiagen). In Experiment 1, DNAse digestion was performed on column, as a part of RNA isolation, and 8 &#x03bc;l of ERCC spike-ins (External RNA Controls Consortium 2005) at a 1:100 dilution was added to 4 &#x03bc;g of total RNA. Libraries were prepared according to the TruSeq
                    <sup>&#x00ae;</sup> Stranded mRNA Sample Preparation Guide Rev E. The libraries were sequenced on a HiSeq2000 at the Genomic Sequencing Unit of the University of Dundee. This preparation largely mirror the sample preparation of the datasets take from Froussios 
                    <italic toggle="yes">et al.</italic> (2017,
                    <sup>
                        <xref ref-type="bibr" rid="ref-39">39</xref>
                    </sup>, Experiments 2&#x2013;4) In Experiments 2&#x2013;4, however, the sequencing libraries were prepared using the Illumina TruSeq
                    <sup>&#x00ae;</sup> Stranded Total RNA with Ribo-Zero Plant kit.</p>
                <p>Experiment 1 has 3 replicates, processed as one batch, with a total of 4 &#x00d7; 10
                    <sup>8</sup> 150-bp paired-end reads. Experiments 2 and 3 have 7 biological WT replicates, while Experiment 4 has 3, for a total of 17 biological WT replicates and ~1.7 &#x00d7; 10
                    <sup>9</sup> 100-bp paired-end reads across the three experiments. The same lab sowed, grew and harvested the plants, and prepared the libraries. The sequencing was performed on the same machine by the same people at the same sequencing facility and all the samples include the ERCC spike-ins which can verify the WT samples are consistent and comparable across experiments.</p>
                <p>
                    <bold>
                        <italic toggle="yes">2.5.2 Quality control, alignment and quantification.</italic>
                    </bold> The quality of the data was quantified using FastQC v0.11.2
                    <sup>
                        <xref ref-type="bibr" rid="ref-40">40</xref>
                    </sup> with all the replicates performing as expected for high quality RNA-Seq data with good median per-base quality (&#x2265;28) across &gt;90% of the read length. The read data for all experiments were aligned to the TAIR10
                    <sup>
                        <xref ref-type="bibr" rid="ref-41">41</xref>
                    </sup> 
                    <italic toggle="yes">Arabidopsis thaliana</italic> genome using the splice-aware aligner STAR v2.4.2a
                    <sup>
                        <xref ref-type="bibr" rid="ref-42">42</xref>
                    </sup> for Experiment 1 and STAR v2.5.0 for Experiments 2&#x2013;4. The index was built with --sjdbOverhang 149 (Experiment 1) or &#x2013;-sjdbOverhang 99 (Experiments 2&#x2013;4) and the alignment was run with parameters: --outSJfilterIntronMaxVsReadN 5000 10000 15000 --outSAMAttributes All --outFilterMultimapNmax 2 --outFilterMismatchNmax 5 --outFilterType BySJout.</p>
                <p>The read data were also aligned to the ERCC spike-ins annotation, using the same parameters. Read counts per gene were then quantified from these alignments with featureCounts v1.5.0-p1 using the publicly available TAIR10 annotation with the parameters: -s 2 -p -t exon --largestOverlap. After running RoSA&#x2019;s 
                    <italic toggle="yes">make_annotation</italic> script to build an antisense annotation, antisense read counts per gene were quantified in the same way, with the parameters: -s 2 -p -t antisense --largestOverlap. Finally, spliced sense and antisense reads were counted using RoSA&#x2019;s 
                    <italic toggle="yes">count_spliced</italic> script with the TAIR10 annotation.</p>
            </sec>
            <sec>
                <title>2.6 Operation</title>
                <p>A full description of RoSAs environment, dependencies, installation and basic operation can be found on the RoSA GitHub repository. Briefly, RoSA is a combination of an 
                    <italic toggle="yes">R</italic> package and python scripts for data preprocessing. Minimal system requirements for the package are 
                    <italic toggle="yes">R</italic> v3.5+, 
                    <italic toggle="yes">python 2</italic> v2.7+ the 
                    <italic toggle="yes">LSD R</italic> package and the python packages 
                    <italic toggle="yes">scipy</italic> (v0.16.1 - 0.17.1), 
                    <italic toggle="yes">numpy</italic>, 
                    <italic toggle="yes">pandas</italic> (not v0.20.1), 
                    <italic toggle="yes">six</italic> and, optionally, 
                    <italic toggle="yes">drmaa</italic> for cluster integration. The python scripts to find and count spliced antisense and sense reads also depends on 
                    <italic toggle="yes">
                        <ext-link ext-link-type="uri" xlink:href="http://lomereiter.github.io/sambamba/">sambamba</ext-link>
                    </italic>. To facilitate ease-of-use, a conda environment that captures all the relevant dependencies is included as part of the RoSA codebase. RoSA&#x2019;s python scripts are provided as a python package ad are installed via pip, while the R package can be installed directly from within R using the devtools package.</p>
                <p>RoSA operates on the total and spliced read counts from sense and antisense bam format read alignments of stranded RNA-Seq datasets, either with or without ERCC spike-in standards. To facilitate easy generation of this read count data, RoSA includes helper pre-processing scripts to generate the antisense counterpart of the provided gtf/gff format sense-strand genome annotations (
                    <italic toggle="yes">make_annotation</italic>), and to generate spliced-read gene count data from the bam format read alignments using both the sense- and anti-sense annotations (
                    <italic toggle="yes">count_spliced</italic>). Both of these helper scripts can be called directly within R as part of the RoSA R package, Detailed help for the R RoSA functionality can be accessed within R with the command, 
                    <italic toggle="yes">help(rosa)</italic>.</p>
            </sec>
        </sec>
        <sec sec-type="results">
            <title>3 Results</title>
            <p>We used RoSA to analyze our data from Experiment 1 for spurious antisense, using both the spike-in and spliced reads counts. RoSA calculated antisense:sense ratios for the spike-ins (
                <xref ref-type="fig" rid="f2">Figure 2</xref>) showing that the 3 replicates have antisense:sense ratios on the spike-ins of 0.0008, 0.004 and 0.011. Although these ratios are small, if the replicates were being compared for differential expression, the differences are potentially substantial for highly expressed genes, and could lead to differential antisense expression being called erroneously.</p>
            <fig fig-type="figure" id="f2" orientation="portrait" position="float">
                <label>Figure 2. </label>
                <caption>
                    <title>Antisense versus sense counts for the ERCC spike-ins for each replicate in Experiment 1.</title>
                    <p>Points represent antisense and sense read counts for individual spike-ins. Each line is the average antisense:sense ratio for one replicate. Here, antisense:sense ratios vary by an order of magnitude across the 3 replicates, with values of 0.004 (WT1), 0.0008 (WT2), and 0.011 (WT3).</p>
                </caption>
                <graphic orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/20776/7407bf18-210b-4de8-aba9-90a17a3786cb_figure2.gif"/>
            </fig>
            <p>For each replicate we calculated the spurious antisense:sense ratios for the spliced reads with RoSA, and compared them to the spike-ins. An overview of the results for all three replicates shows that the spurious antisense levels calculated from the spike-ins are in good agreement with the levels calculated from the spliced reads (
                <xref ref-type="fig" rid="f3">Figure 3</xref> and 
                <xref ref-type="fig" rid="f4">Figure 4</xref>, Row 1).</p>
            <fig fig-type="figure" id="f3" orientation="portrait" position="float">
                <label>Figure 3. </label>
                <caption>
                    <title>Comparison of antisense:sense ratios calculated from spliced reads or spike-ins, by replicate.</title>
                    <p>Ratios estimated from spike-ins show good agreement with ratios estimated from spliced reads. Outliers not shown.</p>
                </caption>
                <graphic orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/20776/7407bf18-210b-4de8-aba9-90a17a3786cb_figure3.gif"/>
            </fig>
            <fig fig-type="figure" id="f4" orientation="portrait" position="float">
                <label>Figure 4. </label>
                <caption>
                    <title>Normalised antisense versus sense counts by replicate.</title>
                    <p>Each column presents data for one replicate. Row 1: Antisense:sense ratios calculated from spike-ins (Black points &amp; fit line) and spliced reads for each gene (density heatmap). The antisense:sense ratios for both the spike-ins and spliced reads are in good agreement. The strong correlation between the sense and antisense spliced counts, and the constant antisense:sense ratio across all genes, indicates that the majority of the antisense expression in the data is not a sequence-, or gene-specific, phenomenon. Rather, this is what would be expected from a systematic process affecting a constant fraction of the sequenced reads. Row 2: Antisense:sense ratios calculated for the full gene counts (spliced &amp; unspliced). The correlation between the sense and antisense expression persists, however it is weaker than the correlation using just the spliced and spike-in sense and antisense expression. This reflects the inclusion of true biological antisense expression, unspliced genes where a global correction is less accurate, and low expression genes where the splicing correction is not well measured. Row 3: Corrected antisense:sense ratios calculated for the full gene counts (spliced &amp; unspliced). The corrected antisense counts show much weaker correlation with the corresponding gene counts reflecting the removal of the systematic spurious antisense count signal. On all plots the dashed line marks y=x; points above this line correspond to genes where the antisense:sense ratio is &gt; 1.</p>
                </caption>
                <graphic orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/20776/7407bf18-210b-4de8-aba9-90a17a3786cb_figure4.gif"/>
            </fig>
            <p>Finally, RoSA calculated a spurious antisense correction across the whole of each replicate. Every spliced gene was corrected with the antisense:sense ratio specific to the gene, and unspliced genes were corrected using the mean ratio calculated from the spike-ins. (RoSA also allows the unspliced correction to be calculated from the mean spliced reads ratio, for datasets without ERCC spike-ins). Overall, RoSA reduces the correlation between antisense and sense counts in the data (
                <xref ref-type="fig" rid="f4">Figure 4</xref>, Rows 2 &amp; 3), as would be expected with a reduction in incorrectly assigned reads. Two examples of corrections made by RoSA are shown in 
                <xref ref-type="fig" rid="f5">Figure 5</xref>, where the antisense signal appears to be almost entirely spurious, RoSA&#x2019;s correction factor reduces the antisense counts substantially, but where there also appears to be some real antisense signal, RoSA&#x2019;s correction factor leaves a higher proportion of counts.</p>
            <fig fig-type="figure" id="f5" orientation="portrait" position="float">
                <label>Figure 5. </label>
                <caption>
                    <title>Two genes with differing antisense expression profiles and the read count corrections proposed by RoSA.</title>
                    <p>The reverse strand gene AT4G18970 (left) has antisense expression which clearly matches the splice sites of the sense strand. RoSA eliminates almost all of the antisense reads. The forward strand gene AT5G66570 (right) has both antisense expression matching the sense strand splice sites, and a peak at the 5&#x2019; end which is unlikely to have resulted from incorrect read assignment. RoSA only reduces the antisense counts by around 40%. (Figures generated by IGB
                        <sup>
                            <xref ref-type="bibr" rid="ref-32">32</xref>
                        </sup>).</p>
                </caption>
                <graphic orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/20776/7407bf18-210b-4de8-aba9-90a17a3786cb_figure5.gif"/>
            </fig>
            <p>As well as identifying instances of antisense expression, looking at antisense counts in this way can also be useful in identifying misannotated genes. For example, in our data there are many genes where the antisense:sense ratio is more than 1 (
                <italic toggle="yes">e.g.</italic> see points lying above 
                <italic toggle="yes">x=y</italic> in 
                <xref ref-type="fig" rid="f4">Figure 4</xref>, Row 2), which may indicate an incorrect strand assignment in the annotation.</p>
            <sec>
                <title>3.1 Comparing antisense:sense ratios</title>
                <p>Calculating antisense:sense ratios allows comparisons of spurious antisense to be made between replicates and between experimental condition, and can reveal whether there are systematic differences which might confound experimental comparisons. For example, 
                    <xref ref-type="fig" rid="f1">Figure 1</xref> presents results from an RNA-Seq experiment where spurious antisense levels differed by an order of magnitude between replicates. In this experiment, the WT replicates had spurious antisense:sense ratios of 0.0031 (SD 0.0116), 0.0009 (SD 0.0070) and 0.0111 (SD 0.031).</p>
                <p>To determine the extent of this problem for RNA-Seq datasets in general, we investigated the spurious antisense levels across a range of experiments and research groups. We analysed antisense reads assigned to the spike-ins from three other experiments in our lab (Experiments 2&#x2013;4), as well as 195 publicly available human datasets from the ENCODE project that included the ERCC spike-ins
                    <sup>
                        <xref ref-type="bibr" rid="ref-28">28</xref>
                    </sup> (see 
                    <italic toggle="yes">Underlying data</italic> for details of the sense, antisense and RoSA-corrected antisense expression for all 
                    <italic toggle="yes">A. thaliana</italic> genes in the datasets from Experiments 1&#x2013;4). A separate antisense:sense ratio was calculated for each replicate in each experiment (
                    <xref ref-type="fig" rid="f6">Figure 6</xref>), showing that spurious antisense reads are present at varying levels and can range across several orders of magnitude. This presents a serious quality control issue for anyone investigating differential antisense expression: a real difference in antisense expression could be completely masked by a difference in spurious antisense.</p>
                <fig fig-type="figure" id="f6" orientation="portrait" position="float">
                    <label>Figure 6. </label>
                    <caption>
                        <title>Spurious antisense:sense ratios for spike-ins, by research group.</title>
                        <p>Data are from either from ENCODE (Gingeras, Graveley and Lecuyer) or our own group (Simpson). Each point represents the ratio for a replicate. Ratios range from 0.0111 to 0.00003.</p>
                    </caption>
                    <graphic orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/20776/7407bf18-210b-4de8-aba9-90a17a3786cb_figure6.gif"/>
                </fig>
            </sec>
        </sec>
        <sec sec-type="conclusions">
            <title>4 Conclusions</title>
            <p>Spurious antisense is common in strand-specific RNA-Seq datasets and can occur at varying levels across replicates in the same experiment. Differing levels of such incorrectly assigned reads are enough to disrupt differential expression analyses of antisense gene expression.</p>
            <p>We have developed a new tool, RoSA, which can detect, quantify and correct for spurious antisense. RoSA provides an important quality control step for researchers analyzing antisense expression in their data.</p>
        </sec>
        <sec>
            <title>Data availability</title>
            <sec>
                <title>Underlying data</title>
                <p>Arabidopsis col-0 WT strand-specific RNA-Seq data from poly-A pulldown, Accession number E-MTAB-7990: 
                    <ext-link ext-link-type="uri" xlink:href="https://identifiers.org/arrayexpress/E-MTAB-7990">https://identifiers.org/arrayexpress/E-MTAB-7990</ext-link>
                </p>
                <p>RNA-seq data of wild type Arabidopsis seedlings, Accession number E-MTAB-5446: 
                    <ext-link ext-link-type="uri" xlink:href="https://identifiers.org/arrayexpress/E-MTAB-5446">https://identifiers.org/arrayexpress/E-MTAB-5446</ext-link>
                </p>
            </sec>
            <sec>
                <title>Extended data</title>
                <p>Zenodo: bartongroup/RoSA: Initial, 
                    <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.5281/zenodo.2661378">http://doi.org/10.5281/zenodo.2661378</ext-link>
                    <sup>
                        <xref ref-type="bibr" rid="ref-43">43</xref>
                    </sup>.</p>
                <p>This project contains the following extended data:</p>
                <list list-type="bullet">
                    <list-item>
                        <label>- </label>
                        <p>Accession numbers for ENCODE data: 
                            <ext-link ext-link-type="uri" xlink:href="https://github.com/bartongroup/RoSA/tree/master/extras/F1000_manuscript/RoSA_Extended_Data.docx">https://github.com/bartongroup/RoSA/tree/master/extras/F1000_manuscript/RoSA_Extended_Data.docx</ext-link>
                        </p>
                    </list-item>
                    <list-item>
                        <label>- </label>
                        <p>Accession details for ENCODE data: 
                            <ext-link ext-link-type="uri" xlink:href="https://github.com/bartongroup/RoSA/blob/master/extras/F1000_manuscript/ENCODE_accessions.xlsx">https://github.com/bartongroup/RoSA/blob/master/extras/F1000_manuscript/ENCODE_accessions.xlsx</ext-link>
                        </p>
                    </list-item>
                    <list-item>
                        <label>- </label>
                        <p>Arabidopsis seedlings RNA-seq read count expression counts: 
                            <ext-link ext-link-type="uri" xlink:href="https://github.com/bartongroup/RoSA/tree/master/extras/F1000_manuscript/expression_data.csv">https://github.com/bartongroup/RoSA/tree/master/extras/F1000_manuscript/expression_data.csv</ext-link>
                        </p>
                    </list-item>
                </list>
                <p>License: GNU General Public License 3.0.</p>
            </sec>
        </sec>
        <sec>
            <title>Software availability</title>
            <p>Source code available from: 
                <ext-link ext-link-type="uri" xlink:href="https://github.com/bartongroup/RoSA">https://github.com/bartongroup/RoSA</ext-link>
            </p>
            <p>Archived source code as at time of publication: 
                <ext-link ext-link-type="uri" xlink:href="https://dx.doi.org/10.5281/zenodo.2661378">https://doi.org/10.5281/zenodo.2661378</ext-link>
                <sup>
                    <xref ref-type="bibr" rid="ref-43">43</xref>
                </sup>.</p>
            <p>License: GNU General Public License 3.0.</p>
        </sec>
    </body>
    <back>
        <ack>
            <title>Acknowledgments</title>
            <p>A previous version of this article is available on BioRxiv: 
                <ext-link ext-link-type="uri" xlink:href="https://dx.doi.org/10.1101/425900">https://doi.org/10.1101/425900</ext-link>.</p>
        </ack>
        <ref-list>
            <ref id="ref-1">
                <label>1</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Pelechano</surname>
                            <given-names>V</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Steinmetz</surname>
                            <given-names>LM</given-names>
                        </name>
					</person-group>:
                    <article-title>Gene regulation by antisense transcription.</article-title>
                    <source>
						
                        <italic toggle="yes">Nat Rev Genet.</italic>
					</source>
                    <year>2013</year>;<volume>14</volume>(<issue>12</issue>):<fpage>880</fpage>&#x2013;<lpage>893</lpage>.
                    <pub-id pub-id-type="pmid">24217315</pub-id>
                    <pub-id pub-id-type="doi">10.1038/nrg3594</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-2">
                <label>2</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Matsui</surname>
                            <given-names>A</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Iida</surname>
                            <given-names>K</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Tanaka</surname>
                            <given-names>M</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Novel Stress-Inducible Antisense RNAs of Protein-Coding Loci Are Synthesized by RNA-Dependent RNA Polymerase.</article-title>
                    <source>
						
                        <italic toggle="yes">Plant Physiol.</italic>
					</source>
                    <year>2017</year>;<volume>175</volume>(<issue>1</issue>):<fpage>457</fpage>&#x2013;<lpage>472</lpage>.
                    <pub-id pub-id-type="pmid">28710133</pub-id>
                    <pub-id pub-id-type="doi">10.1104/pp.17.00787</pub-id>
                    <pub-id pub-id-type="pmcid">5580770</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-3">
                <label>3</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Lin</surname>
                            <given-names>S</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Zhang</surname>
                            <given-names>L</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Luo</surname>
                            <given-names>W</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Characteristics of Antisense Transcript Promoters and the Regulation of Their Activity.</article-title>
                    <source>
						
                        <italic toggle="yes">Int J Mol Sci.</italic>
					</source>
                    <year>2015</year>;<volume>17</volume>(<issue>1</issue>): pii: E9.
                    <pub-id pub-id-type="pmid">26703594</pub-id>
                    <pub-id pub-id-type="doi">10.3390/ijms17010009</pub-id>
                    <pub-id pub-id-type="pmcid">4730256</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-4">
                <label>4</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Chan</surname>
                            <given-names>WY</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Wu</surname>
                            <given-names>SM</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Ruszczyk</surname>
                            <given-names>L</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>The complexity of antisense transcription revealed by the study of developing male germ cells.</article-title>
                    <source>
						
                        <italic toggle="yes">Genomics.</italic>
					</source>
                    <year>2006</year>;<volume>87</volume>(<issue>6</issue>):<fpage>681</fpage>&#x2013;<lpage>92</lpage>.
                    <pub-id pub-id-type="pmid">16458478</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.ygeno.2005.12.006</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-5">
                <label>5</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Swiezewski</surname>
                            <given-names>S</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Liu</surname>
                            <given-names>F</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Magusin</surname>
                            <given-names>A</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Cold-induced silencing by long antisense transcripts of an 
                        <italic toggle="yes">Arabidopsis</italic> Polycomb target.</article-title>
                    <source>
						
                        <italic toggle="yes">Nature.</italic>
					</source>
                    <year>2009</year>;<volume>462</volume>(<issue>7274</issue>):<fpage>799</fpage>&#x2013;<lpage>802</lpage>.
                    <pub-id pub-id-type="pmid">20010688</pub-id>
                    <pub-id pub-id-type="doi">10.1038/nature08618</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-6">
                <label>6</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Liu</surname>
                            <given-names>F</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Marquardt</surname>
                            <given-names>S</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Lister</surname>
                            <given-names>C</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Targeted 3' processing of antisense transcripts triggers 
                        <italic toggle="yes">Arabidopsis FLC</italic> chromatin silencing.</article-title>
                    <source>
						
                        <italic toggle="yes">Science.</italic>
					</source>
                    <year>2010</year>;<volume>327</volume>(<issue>5961</issue>):<fpage>94</fpage>&#x2013;<lpage>97</lpage>.
                    <pub-id pub-id-type="pmid">19965720</pub-id>
                    <pub-id pub-id-type="doi">10.1126/science.1180278</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-7">
                <label>7</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Heo</surname>
                            <given-names>JB</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Sung</surname>
                            <given-names>S</given-names>
                        </name>
					</person-group>:
                    <article-title>Vernalization-mediated epigenetic silencing by a long intronic noncoding RNA.</article-title>
                    <source>
						
                        <italic toggle="yes">Science.</italic>
					</source>
                    <year>2011</year>;<volume>331</volume>(<issue>6013</issue>):<fpage> 76</fpage>&#x2013;<lpage>79</lpage>.
                    <pub-id pub-id-type="pmid">21127216</pub-id>
                    <pub-id pub-id-type="doi">10.1126/science.1197349</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-8">
                <label>8</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Ietswaart</surname>
                            <given-names>R</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Wu</surname>
                            <given-names>Z</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Dean</surname>
                            <given-names>C</given-names>
                        </name>
					</person-group>:
                    <article-title>Flowering time control: another window to the connection between antisense RNA and chromatin.</article-title>
                    <source>
						
                        <italic toggle="yes">Trends Genet.</italic>
					</source>
                    <year>2012</year>;<volume>28</volume>(<issue>9</issue>):<fpage>445</fpage>&#x2013;<lpage>453</lpage>.
                    <pub-id pub-id-type="pmid">22785023</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.tig.2012.06.002</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-9">
                <label>9</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Hobson</surname>
                            <given-names>DJ</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Wei</surname>
                            <given-names>W</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Steinmetz</surname>
                            <given-names>LM</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>RNA polymerase II collision interrupts convergent transcription.</article-title>
                    <source>
						
                        <italic toggle="yes">Mol Cell.</italic>
					</source>
                    <year>2012</year>;<volume>48</volume>(<issue>3</issue>):<fpage>365</fpage>&#x2013;<lpage>374</lpage>.
                    <pub-id pub-id-type="pmid">23041286</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.molcel.2012.08.027</pub-id>
                    <pub-id pub-id-type="pmcid">3504299</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-10">
                <label>10</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Crampton</surname>
                            <given-names>N</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Bonass</surname>
                            <given-names>WA</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Kirkham</surname>
                            <given-names>J</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Collision events between RNA polymerases in convergent transcription studied by atomic force microscopy.</article-title>
                    <source>
						
                        <italic toggle="yes">Nucleic Acids Res.</italic>
					</source>
                    <year>2006</year>;<volume>34</volume>(<issue>19</issue>):<fpage>5416</fpage>&#x2013;<lpage>5425</lpage>.
                    <pub-id pub-id-type="pmid">17012275</pub-id>
                    <pub-id pub-id-type="doi">10.1093/nar/gkl668</pub-id>
                    <pub-id pub-id-type="pmcid">1636470</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-11">
                <label>11</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Onodera</surname>
                            <given-names>CS</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Underwood</surname>
                            <given-names>JG</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Katzman</surname>
                            <given-names>S</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Gene isoform specificity through enhancer-associated antisense transcription.</article-title>
                    <source>
						
                        <italic toggle="yes">PLoS One.</italic>
					</source>
                    <year>2012</year>;<volume>7</volume>(<issue>8</issue>):<fpage>e43511</fpage>.
                    <pub-id pub-id-type="pmid">22937057</pub-id>
                    <pub-id pub-id-type="doi">10.1371/journal.pone.0043511</pub-id>
                    <pub-id pub-id-type="pmcid">3427357</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-12">
                <label>12</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Kawano</surname>
                            <given-names>M</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Aravind</surname>
                            <given-names>L</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Storz</surname>
                            <given-names>G</given-names>
                        </name>
					</person-group>:
                    <article-title>An antisense RNA controls synthesis of an SOS-induced toxin evolved from an antitoxin.</article-title>
                    <source>
						
                        <italic toggle="yes">Mol Microbiol.</italic>
					</source>
                    <year>2007</year>;<volume>64</volume>(<issue>3</issue>):<fpage>738</fpage>&#x2013;<lpage>754</lpage>.
                    <pub-id pub-id-type="pmid">17462020</pub-id>
                    <pub-id pub-id-type="doi">10.1111/j.1365-2958.2007.05688.x</pub-id>
                    <pub-id pub-id-type="pmcid">1891008</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-13">
                <label>13</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Tufarelli</surname>
                            <given-names>C</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Stanley</surname>
                            <given-names>JA</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Garrick</surname>
                            <given-names>D</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Transcription of antisense RNA leading to gene silencing and methylation as a novel cause of human genetic disease.</article-title>
                    <source>
						
                        <italic toggle="yes">Nat Genet.</italic>
					</source>
                    <year>2003</year>;<volume>34</volume>(<issue>2</issue>):<fpage>157</fpage>&#x2013;<lpage>165</lpage>.
                    <pub-id pub-id-type="pmid">12730694</pub-id>
                    <pub-id pub-id-type="doi">10.1038/ng1157</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-14">
                <label>14</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Sherstnev</surname>
                            <given-names>A</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Duc</surname>
                            <given-names>C</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Cole</surname>
                            <given-names>C</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Direct sequencing of 
                        <italic toggle="yes">Arabidopsis thaliana</italic> RNA reveals patterns of cleavage and polyadenylation.</article-title>
                    <source>
						
                        <italic toggle="yes">Nat Struct Mol Biol.</italic>
					</source>
                    <year>2012</year>;<volume>19</volume>(<issue>8</issue>):<fpage>845</fpage>&#x2013;<lpage>52</lpage>.
                    <pub-id pub-id-type="pmid">22820990</pub-id>
                    <pub-id pub-id-type="doi">10.1038/nsmb.2345</pub-id>
                    <pub-id pub-id-type="pmcid">3533403</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-15">
                <label>15</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Beiter</surname>
                            <given-names>T</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Reich</surname>
                            <given-names>E</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Weigert</surname>
                            <given-names>C</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Sense or antisense? False priming reverse transcription controls are required for determining sequence orientation by reverse transcription-PCR.</article-title>
                    <source>
						
                        <italic toggle="yes">Anal Biochem.</italic>
					</source>
                    <year>2007</year>;<volume>369</volume>(<issue>2</issue>):<fpage>258</fpage>&#x2013;<lpage>261</lpage>.
                    <pub-id pub-id-type="pmid">17698025</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.ab.2007.06.044</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-16">
                <label>16</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Haddad</surname>
                            <given-names>F</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Qin</surname>
                            <given-names>AX</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Giger</surname>
                            <given-names>JM</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Potential pitfalls in the accuracy of analysis of natural sense-antisense RNA pairs by reverse transcription-PCR.</article-title>
                    <source>
						
                        <italic toggle="yes">BMC Biotechnol.</italic>
					</source>
                    <year>2007</year>;<volume>7</volume>:<fpage>21</fpage>.
                    <pub-id pub-id-type="pmid">17480233</pub-id>
                    <pub-id pub-id-type="doi">10.1186/1472-6750-7-21</pub-id>
                    <pub-id pub-id-type="pmcid">1876213</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-17">
                <label>17</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Perocchi</surname>
                            <given-names>F</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Xu</surname>
                            <given-names>Z</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Clauder-M&#x00fc;nster</surname>
                            <given-names>S</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Antisense artifacts in transcriptome microarray experiments are resolved by actinomycin D.</article-title>
                    <source>
						
                        <italic toggle="yes">Nucleic Acids Res.</italic>
					</source>
                    <year>2007</year>;<volume>35</volume>(<issue>19</issue>):<fpage>e128</fpage>.
                    <pub-id pub-id-type="pmid">17897965</pub-id>
                    <pub-id pub-id-type="doi">10.1093/nar/gkm683</pub-id>
                    <pub-id pub-id-type="pmcid">2095812</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-18">
                <label>18</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Thomason</surname>
                            <given-names>MK</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Storz</surname>
                            <given-names>G</given-names>
                        </name>
					</person-group>:
                    <article-title>Bacterial antisense RNAs: how many are there, and what are they doing?</article-title>
                    <source>
						
                        <italic toggle="yes">Annu Rev Genet.</italic>
					</source>
                    <year>2010</year>;<volume>44</volume>:<fpage>167</fpage>&#x2013;<lpage>88</lpage>.
                    <pub-id pub-id-type="pmid">20707673</pub-id>
                    <pub-id pub-id-type="doi">10.1146/annurev-genet-102209-163523</pub-id>
                    <pub-id pub-id-type="pmcid">3030471</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-19">
                <label>19</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Tzadok</surname>
                            <given-names>S</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Caspin</surname>
                            <given-names>Y</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Hachmo</surname>
                            <given-names>Y</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Directionality of noncoding human RNAs: how to avoid artifacts.</article-title>
                    <source>
						
                        <italic toggle="yes">Anal Biochem.</italic>
					</source>
                    <year>2013</year>;<volume>439</volume>(<issue>1</issue>):<fpage>23</fpage>&#x2013;<lpage>29</lpage>.
                    <pub-id pub-id-type="pmid">23583907</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.ab.2013.03.031</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-20">
                <label>20</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Yu</surname>
                            <given-names>WH</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>H&#x00f8;vik</surname>
                            <given-names>H</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Olsen</surname>
                            <given-names>I</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Strand-specific transcriptome profiling with directly labeled RNA on genomic tiling microarrays.</article-title>
                    <source>
						
                        <italic toggle="yes">BMC Mol Biol.</italic>
					</source>
                    <year>2011</year>;<volume>12</volume>(<issue>1</issue>):<fpage>3</fpage>.
                    <pub-id pub-id-type="pmid">21235785</pub-id>
                    <pub-id pub-id-type="doi">10.1186/1471-2199-12-3</pub-id>
                    <pub-id pub-id-type="pmcid">3031212</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-21">
                <label>21</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Houseley</surname>
                            <given-names>J</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Tollervey</surname>
                            <given-names>D</given-names>
                        </name>
					</person-group>:
                    <article-title>Apparent non-canonical trans-splicing is generated by reverse transcriptase 
                        <italic toggle="yes">in vitro</italic>.</article-title>
                    <source>
						
                        <italic toggle="yes">PLoS One.</italic>
					</source>
                    <year>2010</year>;<volume>5</volume>(<issue>8</issue>):<fpage>e12271</fpage>.
                    <pub-id pub-id-type="pmid">20805885</pub-id>
                    <pub-id pub-id-type="doi">10.1371/journal.pone.0012271</pub-id>
                    <pub-id pub-id-type="pmcid">2923612</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-22">
                <label>22</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>van Dijk</surname>
                            <given-names>E</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Jaszczyszyn</surname>
                            <given-names>Y</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Thermes</surname>
                            <given-names>C</given-names>
                        </name>
					</person-group>:
                    <article-title>Library preparation methods for next-generation sequencing: tone down the bias.</article-title>
                    <source>
						
                        <italic toggle="yes">Exp Cell Res.</italic>
					</source>
                    <year>2014</year>;<volume>322</volume>(<issue>1</issue>):<fpage>12</fpage>&#x2013;<lpage>20</lpage>.
                    <pub-id pub-id-type="pmid">24440557</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.yexcr.2014.01.008</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-23">
                <label>23</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Levin</surname>
                            <given-names>JZ</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Yassour</surname>
                            <given-names>M</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Adiconis</surname>
                            <given-names>X</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Comprehensive comparative analysis of strand-specific RNA sequencing methods.</article-title>
                    <source>
						
                        <italic toggle="yes">Nat Methods.</italic>
					</source>
                    <year>2010</year>;<volume>7</volume>(<issue>9</issue>):<fpage>709</fpage>&#x2013;<lpage>15</lpage>.
                    <pub-id pub-id-type="pmid">20711195</pub-id>
                    <pub-id pub-id-type="doi">10.1038/nmeth.1491</pub-id>
                    <pub-id pub-id-type="pmcid">3005310</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-24">
                <label>24</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Parkhomchuk</surname>
                            <given-names>D</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Borodina</surname>
                            <given-names>T</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Amstislavskiy</surname>
                            <given-names>V</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Transcriptome analysis by strand-specific sequencing of complementary DNA.</article-title>
                    <source>
						
                        <italic toggle="yes">Nucleic Acids Res.</italic>
					</source>
                    <year>2009</year>;<volume>37</volume>(<issue>18</issue>):<fpage>e123</fpage>.
                    <pub-id pub-id-type="pmid">19620212</pub-id>
                    <pub-id pub-id-type="doi">10.1093/nar/gkp596</pub-id>
                    <pub-id pub-id-type="pmcid">2764448</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-25">
                <label>25</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Jiang</surname>
                            <given-names>L</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Schlesinger</surname>
                            <given-names>F</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Davis</surname>
                            <given-names>CA</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Synthetic spike-in standards for RNA-seq experiments.</article-title>
                    <source>
						
                        <italic toggle="yes">Genome Res.</italic>
					</source>
                    <year>2011</year>;<volume>21</volume>(<issue>9</issue>):<fpage>1543</fpage>&#x2013;<lpage>1551</lpage>.
                    <pub-id pub-id-type="pmid">21816910</pub-id>
                    <pub-id pub-id-type="doi">10.1101/gr.121095.111</pub-id>
                    <pub-id pub-id-type="pmcid">3166838</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-26">
                <label>26</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Zeng</surname>
                            <given-names>W</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Mortazavi</surname>
                            <given-names>A</given-names>
                        </name>
					</person-group>:
                    <article-title>Technical considerations for functional sequencing assays.</article-title>
                    <source>
						
                        <italic toggle="yes">Nat Immunol.</italic>
					</source>
                    <year>2012</year>;<volume>13</volume>(<issue>9</issue>):<fpage>802</fpage>&#x2013;<lpage>807</lpage>.
                    <pub-id pub-id-type="pmid">22910383</pub-id>
                    <pub-id pub-id-type="doi">10.1038/ni.2407</pub-id>
                    <pub-id pub-id-type="pmcid">4138050</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-27">
                <label>27</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Garalde</surname>
                            <given-names>DR</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Snell</surname>
                            <given-names>EA</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Jachimowicz</surname>
                            <given-names>D</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Highly parallel direct RNA sequencing on an array of nanopores.</article-title>
                    <source>
						
                        <italic toggle="yes">Nat Methods.</italic>
					</source>
                    <year>2018</year>;<volume>15</volume>(<issue>3</issue>):<fpage>201</fpage>&#x2013;<lpage>206</lpage>.
                    <pub-id pub-id-type="pmid">29334379</pub-id>
                    <pub-id pub-id-type="doi">10.1038/nmeth.4577</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-28">
                <label>28</label>
                <mixed-citation publication-type="journal">
                    <collab>ENCODE Project Consortium</collab>:
                    <article-title>An integrated encyclopedia of DNA elements in the human genome.</article-title>
                    <source>
						
                        <italic toggle="yes">Nature.</italic>
					</source>
                    <year>2012</year>;<volume>489</volume>(<issue>7414</issue>):<fpage>57</fpage>&#x2013;<lpage>74</lpage>.
                    <pub-id pub-id-type="pmid">22955616</pub-id>
                    <pub-id pub-id-type="doi">10.1038/nature11247</pub-id>
                    <pub-id pub-id-type="pmcid">3439153</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-29">
                <label>29</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Tarasov</surname>
                            <given-names>A</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Vilella</surname>
                            <given-names>AJ</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Cuppen</surname>
                            <given-names>E</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Sambamba: fast processing of NGS alignment formats.</article-title>
                    <source>
						
                        <italic toggle="yes">Bioinformatics.</italic>
					</source>
                    <year>2015</year>;<volume>31</volume>(<issue>12</issue>):<fpage>2032</fpage>&#x2013;<lpage>2034</lpage>.
                    <pub-id pub-id-type="pmid">25697820</pub-id>
                    <pub-id pub-id-type="doi">10.1093/bioinformatics/btv098</pub-id>
                    <pub-id pub-id-type="pmcid">4765878</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-30">
                <label>30</label>
                <mixed-citation publication-type="journal">
                    <collab>The SAM/BAM Format Specification Working Group: </collab>
                    <article-title>Sequence Alignment/Map Format Specification</article-title>.<year>2017</year>.
                    <ext-link ext-link-type="uri" xlink:href="https://samtools.github.io/hts-specs/SAMv1.pdf">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref-31">
                <label>31</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Winters-Hilt</surname>
                            <given-names>S</given-names>
                        </name>
						</person-group>:
                    <article-title>RNA-Dependent RNA Polymerase encoding Artifacts in Eukaryotic Transcriptomes.</article-title>
                    <source>
						
                        <italic toggle="yes">Int J Mol Genet Gene Ther.</italic>
					</source>
                    <year>2017</year>;<volume>2</volume>(<issue>1</issue>).
                    <pub-id pub-id-type="doi">10.16966/2471-4968.108</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-32">
                <label>32</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Freese</surname>
                            <given-names>NH</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Norris</surname>
                            <given-names>DC</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Loraine</surname>
                            <given-names>AE</given-names>
                        </name>
					</person-group>:
                    <article-title>Integrated genome browser: visual analytics platform for genomics.</article-title>
                    <source>
						
                        <italic toggle="yes">Bioinformatics.</italic>
					</source>
                    <year>2016</year>;<volume>32</volume>(<issue>14</issue>):<fpage>2089</fpage>&#x2013;<lpage>2095</lpage>.
                    <pub-id pub-id-type="pmid">27153568</pub-id>
                    <pub-id pub-id-type="doi">10.1093/bioinformatics/btw069</pub-id>
                    <pub-id pub-id-type="pmcid">4937187</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-33">
                <label>33</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Iyer</surname>
                            <given-names>LM</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Koonin</surname>
                            <given-names>EV</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Aravind</surname>
                            <given-names>L</given-names>
                        </name>
					</person-group>:
                    <article-title>Evolutionary connection between the catalytic subunits of DNA-dependent RNA polymerases and eukaryotic RNA-dependent RNA polymerases and the origin of RNA polymerases.</article-title>
                    <source>
						
                        <italic toggle="yes">BMC Struct Biol.</italic>
					</source>
                    <year>2003</year>;<volume>3</volume>(<issue>1</issue>):<fpage>1</fpage>.
                    <pub-id pub-id-type="pmid">12553882</pub-id>
                    <pub-id pub-id-type="doi">10.1186/1472-6807-3-1</pub-id>
                    <pub-id pub-id-type="pmcid">151600</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-34">
                <label>34</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Pinz&#x00f3;n</surname>
                            <given-names>N</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Bertrand</surname>
                            <given-names>S</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Subirana</surname>
                            <given-names>L</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Functional lability of RNA-dependent RNA polymerases in animals.</article-title>
                    <source>
						
                        <italic toggle="yes">bioRxiv.</italic>
					</source>
                    <year>2018</year>.
                    <pub-id pub-id-type="doi">10.1101/339820</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-35">
                <label>35</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Baker</surname>
                            <given-names>SC</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Bauer</surname>
                            <given-names>SR</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Beyer</surname>
                            <given-names>RP</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>The External RNA Controls Consortium: a progress report.</article-title>
                    <source>
						
                        <italic toggle="yes">Nat Methods.</italic>
					</source>
                    <year>2005</year>;<volume>2</volume>(<issue>10</issue>):<fpage>731</fpage>&#x2013;<lpage>734</lpage>.
                    <pub-id pub-id-type="pmid">16179916</pub-id>
                    <pub-id pub-id-type="doi">10.1038/nmeth1005-731</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-36">
                <label>36</label>
                <mixed-citation publication-type="journal">
                    <collab>ERCC</collab>:
                    <article-title>NIST standard reference material 2374</article-title>.<year>2017</year>.
                    <ext-link ext-link-type="uri" xlink:href="https://www.nist.gov/sites/default/files/documents/2016/09/26/2374_coa_2013.pdf">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref-37">
                <label>37</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Liao</surname>
                            <given-names>Y</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Smyth</surname>
                            <given-names>GK</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Shi</surname>
                            <given-names>W</given-names>
                        </name>
					</person-group>:
                    <article-title>featureCounts: an efficient general purpose program for assigning sequence reads to genomic features.</article-title>
                    <source>
						
                        <italic toggle="yes">Bioinformatics.</italic>
					</source>
                    <year>2014</year>;<volume>30</volume>(<issue>7</issue>):<fpage>923</fpage>&#x2013;<lpage>930</lpage>.
                    <pub-id pub-id-type="pmid">24227677</pub-id>
                    <pub-id pub-id-type="doi">10.1093/bioinformatics/btt656</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-38">
                <label>38</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Anders</surname>
                            <given-names>S</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Pyl</surname>
                            <given-names>PT</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Huber</surname>
                            <given-names>W</given-names>
                        </name>
					</person-group>:
                    <article-title>HTSeq--a Python framework to work with high-throughput sequencing data.</article-title>
                    <source>
						
                        <italic toggle="yes">Bioinformatics.</italic>
					</source>
                    <year>2015</year>;<volume>31</volume>(<issue>2</issue>):<fpage>166</fpage>&#x2013;<lpage>169</lpage>.
                    <pub-id pub-id-type="pmid">25260700</pub-id>
                    <pub-id pub-id-type="doi">10.1093/bioinformatics/btu638</pub-id>
                    <pub-id pub-id-type="pmcid">4287950</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-39">
                <label>39</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Froussios</surname>
                            <given-names>K</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Schurch</surname>
                            <given-names>NJ</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Mackinnon</surname>
                            <given-names>K</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>How well do RNA-Seq differential gene expression tools perform in a eukaryote with a complex transcriptome?</article-title>
                    <source>
						
                        <italic toggle="yes">bioRxiv.</italic>
					</source>
                    <year>2017</year>.
                    <pub-id pub-id-type="doi">10.1101/090753</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-40">
                <label>40</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Andrews</surname>
                            <given-names>S</given-names>
                        </name>
					</person-group>:
                    <article-title>FastQC: A quality control tool for high throughput sequence data</article-title>.<year>2010</year>.
                    <ext-link ext-link-type="uri" xlink:href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref-41">
                <label>41</label>
                <mixed-citation publication-type="journal">
                    <collab>Arabidopsis Genome Initiative: </collab>
                    <article-title>Analysis of the genome sequence of the flowering plant 
                        <italic toggle="yes">Arabidopsis thaliana</italic>.</article-title>
                    <source>
						
                        <italic toggle="yes">Nature.</italic>
					</source>
                    <year>2000</year>;<volume>408</volume>(<issue>6814</issue>):<fpage>796</fpage>&#x2013;<lpage>815</lpage>.
                    <pub-id pub-id-type="pmid">11130711</pub-id>
                    <pub-id pub-id-type="doi">10.1038/35048692</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-42">
                <label>42</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Dobin</surname>
                            <given-names>A</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Davis</surname>
                            <given-names>CA</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Schlesinger</surname>
                            <given-names>F</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>STAR: ultrafast universal RNA-seq aligner.</article-title>
                    <source>
						
                        <italic toggle="yes">Bioinformatics.</italic>
					</source>
                    <year>2013</year>;<volume>29</volume>(<issue>1</issue>):<fpage>15</fpage>&#x2013;<lpage>21</lpage>.
                    <pub-id pub-id-type="pmid">23104886</pub-id>
                    <pub-id pub-id-type="doi">10.1093/bioinformatics/bts635</pub-id>
                    <pub-id pub-id-type="pmcid">3530905</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-43">
                <label>43</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Schurch</surname>
                            <given-names>N</given-names>
                        </name>
					</person-group>:
                    <article-title>bartongroup/RoSA: Initial (Version v1.0).</article-title>
                    <source>
						
                        <italic toggle="yes">Zenodo.</italic>
					</source>
                    <year>2019</year>.
                    <ext-link ext-link-type="uri" xlink:href="http://www.doi.org/10.5281/zenodo.2661378">http://www.doi.org/10.5281/zenodo.2661378</ext-link>
                </mixed-citation>
            </ref>
        </ref-list>
    </back>
    <sub-article article-type="reviewer-report" id="report52108">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.20776.r52108</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Werner</surname>
                        <given-names>Andreas</given-names>
                    </name>
                    <xref ref-type="aff" rid="r52108a2">2</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-2274-0662</uri>
                </contrib>
                <contrib contrib-type="author">
                    <name>
                        <surname>Casement</surname>
                        <given-names>John</given-names>
                    </name>
                    <xref ref-type="aff" rid="r52108a1">1</xref>
                    <role>Co-referee</role>
                </contrib>
                <aff id="r52108a1">
                    <label>1</label>Bioinformatics Support Unit, Faculty of Medical Sciences, Newcastle University, Newcastle, UK</aff>
                <aff id="r52108a2">
                    <label>2</label>RNA Interest Group, Institute for Cell and Molecular Biosciences, Newcastle University, Newcastle upon Tyne, UK</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>28</day>
                <month>8</month>
                <year>2019</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2019 Casement J and Werner A</copyright-statement>
                <copyright-year>2019</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport52108" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.18952.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve-with-reservations</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>The authors address an important question, i.e. detection of real and experimentally generated antisense transcripts in strand-specific RNAseq datasets. With their software tool RoSA, they present a valid strategy to solve this problem. However, there are a number of concerns regarding the manuscript and the software.</p>
            <p> 
                <underline>
                    <bold>Manuscript:</bold>
                </underline> 
                <list list-type="bullet">
                    <list-item>
                        <p>Introduction: The authors discuss the mechanisms of how antisense transcripts can regulate/interfere with the expression of the sense gene. The sentence &#x2018;Post-transcriptionally, antisense transcripts can compete with sense transcripts for binding sites&#x2019;, may be applicable in bacteria, but is misleading when studying eukaryotic systems. Post-transcriptional mechanisms involve RNAi, RNA editing or RNA-protein complexes but hardly competition for binding sites with the sense transcript. Moreover, the example given in the text (research by Tufarelli 
                            <italic>et al.</italic>
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-52108-1">1</xref>
                            </sup>) is a prime example of transcriptional interference, not of post-transcriptional regulation.</p>
                    </list-item>
                    <list-item>
                        <p>Introduction: &#x2018;viral-dependent&#x2018; should read &#x2018;virus-dependent&#x2019;.</p>
                    </list-item>
                    <list-item>
                        <p>2.2 Using spliced reads: The fact that RoSA cannot distinguish between spurious antisense reads and RdRP generated transcripts are only relevant if datasets from plants and 
                            <italic>C.elegans</italic> are interrogated (that express significant levels of RdRP). The issue becomes critical if particular loci affected by RdRP amplification contribute disproportionately to a genome wide correction factor. In line with reviewer 1, it is unclear how an open reading frame affects RNA dependent RNA polymerization.</p>
                    </list-item>
                    <list-item>
                        <p>2.4 Mitigating spurious antisense: &#x2018;Since the spurious antisense reads are miss-assigned sense reads, RoSA then adds the spurious antisense count for each gene to its sense count&#x2019;. Adding the spurious antisense counts to the sense counts assumes that reverse transcriptase errors happen at the cost of &#x2018;real&#x2019; RNA -&gt; cDNA processing. Is this correct?</p>
                    </list-item>
                </list> &#x00a0;
                <underline>
                    <bold>Software:</bold>
                </underline> 
                <list list-type="bullet">
                    <list-item>
                        <p>The package as it stands is not user friendly and has several bugs that need to be resolved before the software can be used smoothly.</p>
                    </list-item>
                    <list-item>
                        <p>Installation of the Python package using the pip installer worked without difficulties, as did the R package installation.</p>
                    </list-item>
                    <list-item>
                        <p>Requesting help at the command-line (via the -h option) for the Python scripts 'make_annotation' and 'count_spliced' had the side-effect of creating an empty log file each time.</p>
                    </list-item>
                    <list-item>
                        <p>Running the 'count_spliced' script over a modestly sized alignment file (around 0.5 GB) resulted in over 2000 log files being produced, each detailing the locations at which spliced reads were being counted. This may be related to running the script without drmaa (in serial mode), as a message to that effect was repeatedly printed to the terminal during operation.</p>
                    </list-item>
                    <list-item>
                        <p>The Python scripts can be run as R functions, but the help documentation is not available within R.</p>
                    </list-item>
                    <list-item>
                        <p>A detailed R package vignette including sample data sets and examples of code would be very helpful, especially for the less experienced users.</p>
                    </list-item>
                    <list-item>
                        <p>General comment: Is there a reason why the authors used a storage costly strategy aligning reads followed by counting rather than using alignment free software such as Kallisto or Salmon?</p>
                    </list-item>
                </list>
            </p>
            <p>Are the conclusions about the tool and its performance adequately supported by the findings presented in the article?</p>
            <p>Partly</p>
            <p>Is the rationale for developing the new software tool clearly explained?</p>
            <p>Yes</p>
            <p>Is the description of the software tool technically sound?</p>
            <p>Yes</p>
            <p>Are sufficient details of the code, methods and analysis (if applicable) provided to allow replication of the software development and its use by others?</p>
            <p>Partly</p>
            <p>Is sufficient information provided to allow interpretation of the expected output datasets and any results generated using the tool?</p>
            <p>Partly</p>
            <p>Reviewer Expertise:</p>
            <p>Andreas Werner: Molecular biology, natural antisense transcripts. John Casement: Bioinformatics, RNAseq analysis.</p>
            <p>We confirm that we have read this submission and believe that we have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however we have significant reservations, as outlined above.</p>
        </body>
        <back>
            <ref-list>
                <title>References</title>
                <ref id="rep-ref-52108-1">
                    <label>1</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Transcription of antisense RNA leading to gene silencing and methylation as a novel cause of human genetic disease.</article-title>
                        <source>
                            <italic>Nat Genet</italic>
                        </source>.<year>2003</year>;<volume>34</volume>(<issue>2</issue>) :
                        <elocation-id>10.1038/ng1157</elocation-id>
                        <fpage>157</fpage>-<lpage>65</lpage>
                        <pub-id pub-id-type="pmid">12730694</pub-id>
                        <pub-id pub-id-type="doi">10.1038/ng1157</pub-id>
                    </mixed-citation>
                </ref>
            </ref-list>
        </back>
    </sub-article>
    <sub-article article-type="reviewer-report" id="report49670">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.20776.r49670</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Sudbery</surname>
                        <given-names>Ian</given-names>
                    </name>
                    <xref ref-type="aff" rid="r49670a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-5038-0190</uri>
                </contrib>
                <aff id="r49670a1">
                    <label>1</label>Sheffield Institute For Nucleic Acids (SInFoNiA), Department of Molecular Biology and Biotechnology, University of Sheffield, Sheffield, UK</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>2</day>
                <month>7</month>
                <year>2019</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2019 Sudbery I</copyright-statement>
                <copyright-year>2019</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport49670" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.18952.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve-with-reservations</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>Mourao 
                <italic>et al.</italic> describe a package for estimation of the rate at which stranding information in an RNA-seq sample might be wrong, thus leading to inaccurate quantification of anti-sense transcription, which they apply to their own data and the data of others. This is a fascinating and useful contribution to RNA-seq analysis. The software described should be useful to a range of scientists working the analysis of RNA biology from transcriptomics data. I will split my evaluation of this into two sections &#x2013; the manuscript/science, and the software:</p>
            <p> 
                <bold>Manuscript:</bold>
            </p>
            <p> </p>
            <p> The manuscript is generally well written and describes an interesting problem. I believe that the approach is generally sound. I have a few small issues that the authors might like to address: 
                <list list-type="bullet">
                    <list-item>
                        <p>It is assumed that anti-sense spliced reads are spurious. However, one possibility is that anti-sense spliced reads arise by the action of RNA-dependent RNA-polymerases. The authors acknowledge this point, but state &#x201c;only 8-30% of eukaryotic gene regions have significant length ORFs on their opposite strands, providing an upper limit on the potential impact of this method of transcript on the RNA complement within a cell&#x201d;.&#x00a0; I am unsure how the presence of an ORF on the anti-sense strand has an effect on the RNA complement of a cell. The authors should clarify this argument.</p>
                    </list-item>
                    <list-item>
                        <p>An underlying assumption of the approach is that the ratio of spurious anti-sense reads to sense reads is consistent across the length of the transcript, and thus modelled by the reverse junction reads. This assumption is probably necessary and reasonable, but perhaps the authors might make it explicit.</p>
                    </list-item>
                    <list-item>
                        <p>In regions where two annotated transcripts overlap, the authors truncate their antisense copies of these transcripts to only cover the region that is unique to the sense transcript in question. This is necessary to avoid double counting of reads. However, it does mean that the number of reads counted as antisense will be an underestimate. This becomes problematic because the full length transcript, not the truncated version is used when calculating how many spurious antisense reads are present (this is possible because this is calculated from spliced reads). Thus the correction factor will be overestimated compared to the counted number of antisense reads. This could be detected by finding genes where the correction factor is larger than the total number of antisense reads leading to a negative corrected value for the antisense count. Unfortunately the package truncates the corrected count at 0, so these counts are not evident in output from the package.</p>
                    </list-item>
                    <list-item>
                        <p>The authors show that there is a large variation in the proportion of anti-sense reads that are spurious in three replicates of the same experimental condition. It would be useful to know if this variation is bigger or smaller than that between conditions. Variation within replicates reduces power to detect differential expression between conditions, but systematic variation between conditions might lead to false positive calls of differential expression. The authors might consider looking at the within condition and between condition variance of samples in the Graveley data set they have analysed.</p>
                    </list-item>
                    <list-item>
                        <p>The authors introduce the concept of a &#x201c;maximal transcript&#x201d;. This is very similar to the concept of the superTranscript
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-49670-1">1</xref>
                            </sup>&#x00a0;the authors might like to acknowledge this and cite the relevant work.</p>
                    </list-item>
                    <list-item>
                        <p>The authors note that for some genes there is more anti-sense expression than sense expression and suggest this might be a case of incorrect annotation. Do these genes tend to be spliced or single exon. If spliced, does the annotation carry the canonical splice sequence, its reverse complement, or some other sequence?</p>
                    </list-item>
                    <list-item>
                        <p>Figure 5 is reproduced at a very low resolution, such that it is not possible to read all the text on the panel marked (Right).</p>
                    </list-item>
                </list> 
                <bold>Software:</bold>
            </p>
            <p> </p>
            <p> The software is provided as an R package with a number of accompanying python modules and is available on GitHub. An archived version of the source code used to produce the figures in the paper is available on Zenodo. Dependencies are intended to be installed using a conda environment. The authors are to be commended on their good practice in this respect.</p>
            <p> </p>
            <p> However, I did have a number of issues in attempting to use the software: 
                <list list-type="bullet">
                    <list-item>
                        <p>I could not install the conda environment from the provided environment file. Running the suggested commands gave the following error:</p>
                        <p> </p>
                        <p> Solving environment: failed</p>
                        <p> ResolvePackageNotFound:</p>
                        <p> &#x00a0; - r-base==3.5.1=h4fe35fd_0</p>
                        <p> &#x00a0; - libssh2==1.8.0=h5b517e9_2</p>
                        <p> &#x00a0; - matplotlib==2.2.3=py27h8e2386c_0</p>
                        <p> </p>
                        <p> To try and fix this problems I removed the pinning on the exact build, leaving only the pinning on version number. This returned the following error:</p>
                        <p> Solving environment: failed</p>
                        <p> </p>
                        <p> UnsatisfiableError: The following specifications were found to be in conflict:</p>
                        <p> &#x00a0; - matplotlib=2.2.3</p>
                        <p> &#x00a0; - sip=4.18.1</p>
                        <p> &#x00a0; - tk=8.6.8</p>
                        <p> </p>
                        <p> I was able to install by manually creating an environment using the dependencies listed in the readme, although using the pip install for rosa downgraded the pandas install from 0.24 to 0.19.</p>
                        <p> </p>
                        <p> The ability to install all dependencies as a conda environment is very useful to the average user, and the authors should consider either fixing the supplied env file, or (ideally) creating a proper rosa conda package, so that rosa and all its dependencies (both R and python) can be installed with conda install rosa.</p>
                    </list-item>
                    <list-item>
                        <p>Once installed I found the documentation to be a little on the sparse side, and it times confusing. For example, the help for the `rosa` function states that the parameter &#x201c;data&#x201d; should be a list of dataframes with a particular list of columns. But the help for the &#x201c;group&#x201d; parameter says that it is a character vector specifying the replicates for each column in data. Thus it is unclear if replicates should be provided as a member of a list of data.frames, or as separate columns in one data.frame.</p>
                    </list-item>
                    <list-item>
                        <p>There are required arguments for matrices of spike-in counts, as well as an argument to provide the positions of spike-ins in the &#x201c;data&#x201d; matrix. It is unclear whether spike-ins should be provided using only one or both of these mechanisms, or what to do if there is no spike-in data.</p>
                    </list-item>
                    <list-item>
                        <p>The help states &#x201c;This function was written with the intention of obtaining count and group parameters from an edgeR DGEList object d, via d$counts and d$samples$group.&#x201d; But d$counts would not return a data.frame of the format specified in the help for the &#x201c;data&#x201d; argument.</p>
                    </list-item>
                    <list-item>
                        <p>The manuscript doesn&#x2019;t really mention TPMs, but the help for rosa states that TPMs are required to run the function. I believe it would not be clear to a less experienced user how to calculate a TPM for the anti-sense counts generated on the custom anti-sense annotation.</p>
                    </list-item>
                    <list-item>
                        <p>Not help page is provided for either the `count_spliced` nor `make_annotation` functions. Nor is much in the way of documentation provided for the python scripts these functions call.</p>
                    </list-item>
                    <list-item>
                        <p>The package has no associated vignette. I think it would be of great use to users, particularly less experienced ones, to provide a walk-through of the analysis of a dataset from start to finish, with a simplified example dataset.</p>
                    </list-item>
                </list> Overall, I think that this work is both novel, interesting, useful and basically sound. Before I can recommend approval, I would need to see minimally that the issues identified with installation and documentation have been dealt with. I would also be interested in the authors replies to my comments on the manuscript itself.</p>
            <p>Are the conclusions about the tool and its performance adequately supported by the findings presented in the article?</p>
            <p>Yes</p>
            <p>Is the rationale for developing the new software tool clearly explained?</p>
            <p>Yes</p>
            <p>Is the description of the software tool technically sound?</p>
            <p>Yes</p>
            <p>Are sufficient details of the code, methods and analysis (if applicable) provided to allow replication of the software development and its use by others?</p>
            <p>Partly</p>
            <p>Is sufficient information provided to allow interpretation of the expected output datasets and any results generated using the tool?</p>
            <p>Partly</p>
            <p>Reviewer Expertise:</p>
            <p>I am a lecturer in bioinformatics. My research focuses on using the tools for computational biology and functional genomics to study RNA biology and regulation of gene expression, particularly at the post transcriptional level.</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above.</p>
        </body>
        <back>
            <ref-list>
                <title>References</title>
                <ref id="rep-ref-49670-1">
                    <label>1</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>SuperTranscripts: a data driven reference for analysis and visualisation of transcriptomes.</article-title>
                        <source>
                            <italic>Genome Biol</italic>
                        </source>.<year>2017</year>;<volume>18</volume>(<issue>1</issue>) :
                        <elocation-id>10.1186/s13059-017-1284-1</elocation-id>
                        <fpage>148</fpage>
                        <pub-id pub-id-type="pmid">28778180</pub-id>
                        <pub-id pub-id-type="doi">10.1186/s13059-017-1284-1</pub-id>
                    </mixed-citation>
                </ref>
            </ref-list>
        </back>
    </sub-article>
</article>
