<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN" "http://jats.nlm.nih.gov/publishing/1.2/JATS-journalpublishing1.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="other" dtd-version="1.2" xml:lang="en">
    <front>
        <journal-meta>
            <journal-id journal-id-type="pmc">F1000Research</journal-id>
            <journal-title-group>
                <journal-title>F1000Research</journal-title>
            </journal-title-group>
            <issn pub-type="epub">2046-1402</issn>
            <publisher>
                <publisher-name>F1000 Research Limited</publisher-name>
                <publisher-loc>London, UK</publisher-loc>
            </publisher>
        </journal-meta>
        <article-meta>
            <article-id pub-id-type="doi">10.12688/f1000research.157160.1</article-id>
            <article-categories>
                <subj-group subj-group-type="heading">
                    <subject>Software Tool Article</subject>
                </subj-group>
                <subj-group>
                    <subject>Articles</subject>
                </subj-group>
            </article-categories>
            <title-group>
                <article-title>BEpipeR: a user-friendly, flexible, and scalable data synthesis pipeline for the Biodiversity Exploratories and other research consortia</article-title>
                <fn-group content-type="pub-status">
                    <fn>
                        <p>[version 1; peer review: 1 approved with reservations, 1 not approved]</p>
                    </fn>
                </fn-group>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Gl&#x00fc;ck</surname>
                        <given-names>Marcel</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Software</role>
                    <role content-type="http://credit.niso.org/">Validation</role>
                    <role content-type="http://credit.niso.org/">Visualization</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-9027-6750</uri>
                    <xref ref-type="corresp" rid="c1">a</xref>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Bossdorf</surname>
                        <given-names>Oliver</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Funding Acquisition</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Thomassen</surname>
                        <given-names>Henri A.</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Funding Acquisition</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <aff id="a1">
                    <label>1</label>Institute of Evolution and Ecology, Comparative Zoology, T&#x00fc;bingen University, T&#x00fc;bingen, Germany</aff>
                <aff id="a2">
                    <label>2</label>Institute of Evolution and Ecology, Plant Evolutionary Ecology, T&#x00fc;bingen University, T&#x00fc;bingen, Germany</aff>
            </contrib-group>
            <author-notes>
                <corresp id="c1">
                    <label>a</label>
                    <email xlink:href="mailto:marcel.glueck@uni-tuebingen.de">marcel.glueck@uni-tuebingen.de</email>
                </corresp>
                <fn fn-type="conflict">
                    <p>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>24</day>
                <month>10</month>
                <year>2024</year>
            </pub-date>
            <pub-date pub-type="collection">
                <year>2024</year>
            </pub-date>
            <volume>13</volume>
            <elocation-id>1268</elocation-id>
            <history>
                <date date-type="accepted">
                    <day>14</day>
                    <month>10</month>
                    <year>2024</year>
                </date>
            </history>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2024 Gl&#x00fc;ck M et al.</copyright-statement>
                <copyright-year>2024</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access article distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <self-uri content-type="pdf" xlink:href="https://f1000research.com/articles/13-1268/pdf"/>
            <abstract>
                <sec>
                    <title>Background</title>
                    <p>Large research consortia can generate tremendous amounts of biological information, including high-resolution soil, vegetation, and climate data. While this knowledge stock holds invaluable potential for answering evolutionary and ecological questions, making these data exploitable for modelling remains a daunting task due to the many processing steps required for synthesis. This might result in many researchers to fall back to a handful of ready-to-use data sets, potentially at the expense of statistical power and scientific rigour. In a push for a more stringent approach, we introduce BEpipeR, an R pipeline that allows for the streamlined synthesis of plot-based Biodiversity Exploratories data.</p>
                </sec>
                <sec>
                    <title>Methods</title>
                    <p>BEpipeR was designed with flexibility and ease of use in mind. For instance, users simply choose between aggregating forest or grassland data, or a combination thereof, effectively allowing them to process any experimental plot data of this research consortium. Additionally, instead of coding, they parse most processing information in a user-friendly way through parameter sheets. Processing includes, among others, the creation of a spatially explicit plot-ID template, data wrangling, quality control, plot-wise aggregations, the calculation of derived metrics, data joining to a large composite data set, and metadata compilation.</p>
                </sec>
                <sec>
                    <title>Results</title>
                    <p>With BEpipeR, we provide a feature-rich pipeline that allows users to process Biodiversity Exploratories data in a flexible and reproducible way. This pipeline might serve as a starting point for aggregating the numerous data sets of this and potentially similar research consortia. In this way, it might be a primer for the construction of consortia-wide composite data sets that take full advantage of the consortia&#x2019;s rich information stocks, ultimately boosting the visibility and participation of individual research projects.</p>
                </sec>
                <sec>
                    <title>Conclusions</title>
                    <p>The 
                        <ext-link ext-link-type="uri" xlink:href="https://github.com/marcelglueck/BEpipeR">BEpipeR</ext-link> pipeline permits the user-friendly processing and plot-wise aggregation of Biodiversity Exploratories data. With modifications, this framework may be easily adopted by other research consortia.</p>
                </sec>
            </abstract>
            <kwd-group kwd-group-type="author">
                <kwd>Research consortia</kwd>
                <kwd>large-scale long-term environmental research</kwd>
                <kwd>environmental data</kwd>
                <kwd>data democratization and utilization</kwd>
                <kwd>reproducibility</kwd>
                <kwd>R programming language</kwd>
                <kwd>Biodiversity Exploratories</kwd>
                <kwd>BExIS</kwd>
            </kwd-group>
            <funding-group>
                <award-group id="fund-1" xlink:href="http://dx.doi.org/10.13039/501100001659">
                    <funding-source>Deutsche Forschungsgemeinschaft</funding-source>
                    <award-id>433025806</award-id>
                </award-group>
                <funding-statement>German Research Foundation (DFG): 433025806, awarded to HAT and OB. </funding-statement>
                <funding-statement>
                    <italic>The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</italic>
                </funding-statement>
            </funding-group>
        </article-meta>
    </front>
    <body>
        <sec id="sec5" sec-type="intro">
            <title>Introduction</title>
            <p>Large-scale long-term environmental research frameworks such as LTER (
                <xref ref-type="bibr" rid="ref25">Hobbie et al. 2003</xref>), TEAM (
                <xref ref-type="bibr" rid="ref39">Rovero and Ahumada 2017</xref>), ForestGEO (
                <xref ref-type="bibr" rid="ref3">Anderson-Teixeira et al. 2015</xref>, 
                <xref ref-type="bibr" rid="ref16">Davies et al. 2021</xref>), and the Biodiversity Exploratories (
                <xref ref-type="bibr" rid="ref20">Fischer et al. 2010a</xref>, 
                <xref ref-type="bibr" rid="ref21">2010b</xref>) are at the forefront of functional biodiversity research. These frameworks are fuelled by well-orchestrated infrastructure projects, unmatched standing scientific expertise, and high-resolution time-series data. This combination of factors allows them to answer some of the most intricate and pressing ecological questions of our time with high statistical power. For instance, they shed light on how land-use shapes biodiversity and ecosystem processes (
                <xref ref-type="bibr" rid="ref2">Allan et al. 2015</xref>, 
                <xref ref-type="bibr" rid="ref18">Felipe-Lucia et al. 2020</xref>, 
                <xref ref-type="bibr" rid="ref27">Le Provost et al. 2023</xref>), how this gives rise to profound changes in community composition and network interactions (
                <xref ref-type="bibr" rid="ref50">Weiner et al. 2014</xref>, 
                <xref ref-type="bibr" rid="ref47">V&#x00e1;lyi et al. 2015</xref>, 
                <xref ref-type="bibr" rid="ref8">Bl&#x00fc;thgen et al. 2016</xref>, 
                <xref ref-type="bibr" rid="ref13">Chavarria et al. 2021</xref>), and the importance of temporal and spatial heterogeneity in shaping these patterns (
                <xref ref-type="bibr" rid="ref26">Kloss et al. 2011</xref>, 
                <xref ref-type="bibr" rid="ref1">Allan et al. 2014</xref>, 
                <xref ref-type="bibr" rid="ref43">Seibold et al. 2019</xref>, 
                <xref ref-type="bibr" rid="ref48">van Breugel et al. 2019</xref>).</p>
            <p>Due to their size and the presence of dedicated infrastructure projects, these frameworks continue to benefit from an ever-increasing stock of biological data. For instance, as of 19/03/2024, the Biodiversity Exploratories Information System (BExIS, 
                <xref ref-type="bibr" rid="ref12">Chamanara et al. 2021</xref>) featured more than 1500 data sets for their experimental forest and grassland plots (EPs). Arguably, while this wealth of information holds great promise for answering even highly intricate research questions, considerable effort is needed to combine these data in a way that allows for their straightforward use. While for a limited number of data sets and at the expense of reproducibility, such processing might be performed in spreadsheet editors such as LibreOffice Calc or Microsoft Excel, this approach becomes increasingly infeasible when more data are incorporated, ultimately asking for a more efficient way of processing. While this often means using programming languages such as R, Python, and Julia, not all ecologists might be used to these languages and learning one can be perceived as daunting (
                <xref ref-type="bibr" rid="ref5">Baker 2017</xref>, 
                <xref ref-type="bibr" rid="ref15">Custer et al. 2021</xref>).</p>
            <p>Unsurprisingly, to circumvent these challenges, many research projects within these consortia might rely on a handful of data sets that allow for a straightforward and less time-consuming incorporation into their workflows. By doing so, they might leave out potential data that would have been instrumental in answering their complex scientific questions, ultimately causing a loss in statistical power. Instead, a more compelling approach would be a tool that allows for a user-friendly processing of data sets, rendering the decision between progressing fast or incorporating many data obsolete. To this end, we introduce 
                <ext-link ext-link-type="uri" xlink:href="https://github.com/marcelglueck/BEpipeR">BEpipeR</ext-link>, an R pipeline that allows for the synthesis of EP-level (a)biotic Biodiversity Exploratories data. To maximise its usability and ease of implementation, we purposely limited the amount of coding required. For instance, we allow users to parse most aggregation information through csv files and toggle easily between three aggregation modes (forest, grassland, or combined) that allow for the straightforward processing of data provided by this research framework.</p>
            <p>Regardless of the mode selected, BEpipeR performs the following processing: creation of a spatially explicit plot-ID template, data substitution through exact and pattern-based approaches, subsettings, resolving species aggregates issues through fallbacks, data reshaping, variables standardization, mean and median-based outlier detection, data aggregation both within and across data sets, processing and aggregation of climate data generated and extensively pre-processed by TubeDB (
                <xref ref-type="bibr" rid="ref54">W&#x00f6;llauer et al. 2021</xref>); in the following referred to as &#x201c;BExIS&#x2019; climate tool&#x201d;, normalization by repeated rarefaction, calculating alpha diversity indices, data joining to template, quality control, variables selection by variance inflation factor analyses, and the compilation of metadata from JSON metadata files. Arguably, BEpipeR has the potential to generate large composite data sets in a highly reproducible fashion (
                <xref ref-type="bibr" rid="ref4">Baker 2016</xref>). As this might aid the democratization and utilization of available research data, we hope for this pipeline to become a focal point for compiling the vast amount of environmental information generated by the Biodiversity Exploratories and, potentially, similar research consortia.</p>
        </sec>
        <sec id="sec6" sec-type="methods">
            <title>Methods</title>
            <sec id="sec7">
                <title>Implementation</title>
                <p>BEpipeR is written in R v.4.1.1 (
                    <xref ref-type="bibr" rid="ref37">R Core Team 2021</xref>) and harnesses 
                    <italic toggle="yes">renv</italic> v.1.0.3 (
                    <xref ref-type="bibr" rid="ref46">Ushey and Wickham 2023</xref>) to establish an R project-based reproducible environment. This means that in setting-up the pipeline, all packages that were used to create the pipeline in the first place are automatically installed to a per-project library. These packages include 
                    <italic toggle="yes">here</italic> v.1.0.1 (
                    <xref ref-type="bibr" rid="ref34">M&#x00fc;ller 2020</xref>) for a streamlined file and directory referencing, 
                    <italic toggle="yes">terra</italic> v.1.7-18 (
                    <xref ref-type="bibr" rid="ref24">Hijmans et al. 2022</xref>) for spatial processing, 
                    <italic toggle="yes">data.table</italic> v.1.14.8 (
                    <xref ref-type="bibr" rid="ref6">Barrett et al. 2023</xref>), 
                    <italic toggle="yes">plyr</italic> v.1.8.8 (
                    <xref ref-type="bibr" rid="ref52">Wickham 2011</xref>), 
                    <italic toggle="yes">Hmisc</italic> v.5.1-1 (
                    <xref ref-type="bibr" rid="ref23">Harrell 2023</xref>), 
                    <italic toggle="yes">tidyverse</italic> v.2.0.0 (
                    <xref ref-type="bibr" rid="ref53">Wickham et al. 2019</xref>), and 
                    <italic toggle="yes">doSNOW</italic> v.1.0.20 (
                    <xref ref-type="bibr" rid="ref33">Microsoft Corporation and Weston 2022</xref>) for general processing, respectively, 
                    <italic toggle="yes">rtk</italic> v.0.2.6.1 (
                    <xref ref-type="bibr" rid="ref40">Saary et al. 2017</xref>) for rarefaction, 
                    <italic toggle="yes">vegan</italic> v.2.6-4 (
                    <xref ref-type="bibr" rid="ref17">Dixon 2003</xref>) for calculating diversity indices, 
                    <italic toggle="yes">usdm</italic> v.2.1-6 (
                    <xref ref-type="bibr" rid="ref35">Naimi et al. 2014</xref>) for variables selection, and 
                    <italic toggle="yes">jsonlite</italic> v.1.8.4 (
                    <xref ref-type="bibr" rid="ref36">Ooms 2014</xref>) for metadata extraction.</p>
                <p>For set-up, we assume the use of 
                    <ext-link ext-link-type="uri" xlink:href="https://posit.co/download/rstudio-desktop/">RStudio</ext-link> integrated development environment (IDE) (
                    <xref ref-type="bibr" rid="ref38">Racine 2012</xref>) and a connection to the internet. First, upon downloading the desired release from 
                    <ext-link ext-link-type="uri" xlink:href="https://github.com/marcelglueck/BEpipeR">GitHub</ext-link>, the user unzips the compressed pipeline file. Second, the user obtains information on the R version required for running the pipeline by inspecting the top lines of the renv.lock file, placed at the root of BEpipeR&#x2019;s directory structure. If the required version is not available on their system, they obtain it from the 
                    <ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/">Comprehensive R Archive Network</ext-link> and install it. Additionally, on Windows, they ensure that a compatible version of 
                    <ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/bin/windows/Rtools/">RTools</ext-link> is installed. Third, the user sets the required R version as the default version in RStudio and exits the IDE. Last, BEpipeR&#x2019;s reproducible environment can be unfolded by opening the BEpipeR.Rproj file using RStudio, upon which the 
                    <italic toggle="yes">renv</italic> package is bootstrapped and all required packages can be installed to the per-project library by typing &#x2018;renv::restore()&#x2019; and confirming the prompted dialog with &#x2018;y&#x2019;. Subsequently, users may want to increase the number of lines retained in RStudio&#x2019;s console to ensure that all messages generated in running the pipeline are available for post-run inspection. Noteworthy, for visualizing plot locations, the border of Germany must be obtained manually from 
                    <ext-link ext-link-type="uri" xlink:href="https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/gadm41_DEU.gpkg">GADM</ext-link> and stored for its use by the pipeline as &#x2018;Germany_borders.gpkg&#x2019; in the pipeline&#x2019;s &#x2018;Helpers&#x2019; directory. For up-to-date set-up instructions, users are referred to the pipeline&#x2019;s GitHub presence.</p>
            </sec>
            <sec id="sec8">
                <title>Operation</title>
                <p>
                    <underline>Parsing information</underline>
                </p>
                <p>With few exceptions (see below), BEpipeR&#x2019;s flow of operations (
                    <xref ref-type="fig" rid="f1">Figure 1</xref>) is controlled through three csv files (paramMAIN, paramDW, and paramSUB) that are used to parse processing information in a user-friendly fashion. Of these three, paramMAIN is the most instrumental and holds the majority of the aggregation information, whereas paramDW and paramSUB are helper files that coordinate the data wrangling (DW) and subsetting (SUB) steps, respectively. As the Excel versions of these files support users in providing processing information through conditional formatting and functions, we suggest that users provide processing information to these versions first, followed by exporting them to csv file format. While we purposely minimized user interventions to the pipeline&#x2019;s code, they could not be avoided completely. Currently, user actions might be required at five points (
                    <xref ref-type="table" rid="T1">Table 1</xref>) marked with the comment string &#x201c;ACTION POTENTIALLY REQUIRED&#x201d; in the R script. Users are advised to familiarize themselves with these interventions before executing the pipeline productively.</p>
                <fig fig-type="figure" id="f1" orientation="portrait" position="float">
                    <label>Figure 1. </label>
                    <caption>
                        <title>Overview of the BEpipeR pipeline.</title>
                        <p>Included are the parameter files used in its operation (left-hand side), its major processing steps (centre), and their sub-steps (right-hand side; italic: optional). Deploying the reproducible environment in setting-up the pipeline is only performed once, and hence greyed out.</p>
                    </caption>
                    <graphic id="gr1" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/172574/f22a9d64-130c-4b7c-9552-f1764c8495f6_figure1.gif"/>
                </fig>
                <table-wrap id="T1" orientation="portrait" position="float">
                    <label>Table 1. </label>
                    <caption>
                        <title>Information on running parameters to be parsed directly to BEpipeR.</title>
                        <p>The following parameters cannot be parsed through BEpipeR's parameter files. Instead, they must be provided directly to the pipeline's source code at locations marked with the string &#x201c;ACTION POTENTIALLY REQUIRED&#x201d;.</p>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Variable name</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Expected input</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Input class</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Default value</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Function</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">BEpipeR_mode</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Either &#x201c;forest&#x201d;, &#x201c;grassland&#x201d;, or &#x201c;combined" for aggregating forest or grassland data, or a combination thereof, respectively.</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">string</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">&#x201c;combined&#x201d;</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Specifies the mode for processing input data.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">CLIM_min_years</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">An integer without leading zeros.</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">integer</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">4</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">The minimal number of years a variable must have data for to be retained in the climate data set (ID: 19007).</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">DI_reshape_whitelist</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Quoted data set IDs without version information (i.e., base IDs); separated by commas.</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">string</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">empty</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Prevents the reshaping from long to wide format for the data sets specified.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">FQC_plots_to_remove</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Quoted EP plot designations with leading zeros; separated by commas.</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">string</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">empty</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Allows for the exclusion of plots whose inclusion would result in the discarding of many or all variables in excluding variables with any NA (Not Available) values.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">VS_protected_variables</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Quoted full variable names as provided in the FQC (i.e., quality-controlled) composite data set; separated by commas.</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">string</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">empty</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Allows for the protection of variables from being excluded through stepwise variance inflation factor analysis.</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>To demonstrate BEpipeR&#x2019;s flow of operations and guide users in interpreting the pipeline&#x2019;s output, we distribute BEpipeR in a &#x2018;just-ran&#x2019; state. This means that the pipeline comes with both exemplary input data and the results produced by processing these files (see Use Cases). As indicated (
                    <xref ref-type="table" rid="T2">Table 2</xref>), exemplary input files must be replaced with real-world Biodiversity Exploratories data when using the pipeline productively to ensure the correctness of results.</p>
                <table-wrap id="T2" orientation="portrait" position="float">
                    <label>Table 2. </label>
                    <caption>
                        <title>BEpipeR&#x2019;s core directories, their expected/generated main content, and processing-related information.</title>
                        <p>&#x2018;Provisioning&#x2019; describes whether the content is generated automatically or must be provided by the user. Placeholders are surrounded by square brackets. BExIS: Biodiversity Exploratories Information System.</p>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Directory</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">(Expected) content and processing information</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Provisioning</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Helpers</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">The main parameter files (paramMAIN), data wrangling (paramDW) and subsetting (paramSUB) helpers; the data set used for constructing the spatially explicit plot IDs template (currently data set 20826_7, dummy data provided must be replaced with real Biodiversity Exploratories data); a GeoPackage file with the border of Germany for visualizing plot locations (see Implementation section for more information).</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Manual</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Metadata</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Metadata files to all data sets flagged as included in paramMAIN. For data not obtained through BExIS&#x2019; climate tool, this is the corresponding &#x2018;[baseID]_[version]_datastructure.txt&#x2019; file. For climate data obtained through BExIS&#x2019; climate tool, this equates to its sensor description csv file renamed to match the scheme &#x2018;[baseID]_[version]_sensor_description.csv&#x2019;.</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Manual</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Output</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Files generated by BEpipeR throughout its execution (see 
                                    <xref ref-type="table" rid="T3">Table 3</xref>). This directory is expunged at the start of each pipeline run.</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Automatic</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Processing</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Files copied here from &#x2018;Source&#x2019; for processing through BEpipeR. This directory is expunged at the start of each pipeline run.</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Automatic</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">R_scripts</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">The R programming language script of the BEpipeR pipeline.</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Automatic</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">renv</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Files required for setting-up and maintaining the pipeline&#x2019;s reproducible environment.</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Automatic</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Source</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">All data sets in csv file format to be processed by BEpipeR following their automatic transfer to the &#x2018;Processing&#x2019; directory. Naming scheme: &#x2018;[baseID]_[version]_data.csv&#x2019;.</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Manual</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Temp</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Temporary files written by the 
                                    <italic toggle="yes">rtk</italic> R package in performing repeated rarefaction. This directory is created by BEpipeR.</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Automatic</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>We support users in providing the processing information required with dictionaries to all three param files that can be found as additional sheets in their respective Excel files. We recommend that users consult this information before starting to work with the pipeline. For exhaustive information on data processing, users are referred to the R script itself, which features comments on the reasoning for each step performed as well as further background information throughout. Additionally, in the following, we provide an in-depth description of the workflow, including details on how to encode the information required, the processing performed, and the output generated, to guide users in familiarizing themselves with the pipeline. Potential abbreviations provided in parentheses after the names of processing steps refer to the prefixes of variable names used in the BEpipeR R script.</p>
                <p>
                    <underline>Data retrieval, exploration, and curation</underline>
                </p>
                <p>Data retrieval from BExIS database, as well as their exploration and curation are not performed by BEpipeR. Instead, they are performed best by the user on a dataset-to-dataset basis. This approach acknowledges that the decisions on incorporation and processing depend on the user&#x2019;s aims as well as the unique combination of data and metadata. It further allows users to harness existing workflows for inspecting tabular data. Upon examining both data and metadata, the user decides whether the data set at hand should be processed by BEpipeR, and if yes, provides all the information required for its processing to paramMAIN and, if applicable, paramDW and paramSUB (see below). Subsequently, they copy the respective data set in csv file format (named &#x2018;[baseID]_[version]_data.csv&#x2019;; square brackets denote placeholders) to BEpipeR&#x2019;s &#x2018;Source&#x2019;, as well as its &#x2018;[baseID]_[version]_datastructure.txt&#x2019; file to the &#x2018;Metadata&#x2019; directory.</p>
                <p>
                    <underline>Setting-up</underline>
                </p>
                <p>Following the successful deployment of the reproducible environment through 
                    <italic toggle="yes">renv</italic> and before executing the pipeline, the user decides on one of three possible processing modes: i) forest, for aggregating the Biodiversity Exploratories&#x2019; forest data, ii) grassland, for grassland data, or iii) combined, for aggregating both forest and grassland data at the same time. They then provide the corresponding string to the BEpipeR_mode variable in the pipeline&#x2019;s source code (
                    <xref ref-type="table" rid="T1">Table 1</xref>). Noteworthy, both &#x2018;Processing&#x2019; and &#x2018;Output&#x2019; directories are expunged immediately after the start of each pipeline run to avoid potentially outdated files from being mistaken as up-to-date ones. Subsequently, the &#x2018;Processing&#x2019; directory is populated by copying all csv data sets from the &#x2018;Source&#x2019; directory to this folder. Please note, as data sets are retrieved by their base IDs, BEpipeR does not allow multiple data sets with the same base ID to be present in the &#x2018;Processing&#x2019; directory. If this issue is detected, the user is informed and asked to solve the issue.</p>
                <p>
                    <underline>Data pre-processing</underline>
                    <list list-type="order">
                        <list-item>
                            <label>1</label>
                            <p>
                                <bold>Template creation:</bold> Combining Biodiversity Exploratories data is complicated by two factors. First, most data sets are not spatially explicit per se, meaning they do not feature location information that allows for a straightforward calculation of inter-plot distances. Second, plot information is not provided in a harmonized fashion. This means that the column holding plot designations might be arbitrarily named (e.g., EP, EpPlotID, EP_plot_ID, Useful_EP_PlotID, Plot, PlotID, Plot ID, Plot_ID, or plotid_withzero) and located within the data set. This is further complicated by the presence or absence of leading zeros in plot numbers (e.g., AEW1 vs. AEW01), and two alternative plot encoding schemes (e.g., AEW01 in EP is A18422 in grid plot (GP) encoding). To allow for joining the data regardless of encoding and to maximise its downstream usability, the plot IDs template constructed by BEpipeR holds EP as well as GP designations with and without leading zeros, respectively.</p>
                            <p>To allow users to seamlessly use the data generated by BEpipeR in downstream spatially explicit statistical frameworks, the pipeline enriches the template with plot location information harmonized to the World Geodetic System 1984 (WGS84, EPSG: 4326) and informs the user about the spatial imprecision introduced by reprojecting location data from DHDN (Deutsches Hauptdreiecksnetz) to this unified coordinate reference system. Subsequently, the csv version of paramMAIN is imported to the R session as &#x2018;datasets_table&#x2019; and filtered for instrumental columns and data sets flagged for inclusion. This table outlines the processing to be performed on each data set and is updated after each major processing step to reflect the progress of the pipeline. Template creation is concluded by various quality checks, including warnings if &#x2018;datasets_table&#x2019; features data sets not found in the &#x2018;Processing&#x2019; directory, or the system-wide memory available might not be sufficient for executing the pipeline (see Minimal system requirements).</p>
                        </list-item>
                        <list-item>
                            <label>2</label>
                            <p>
                                <bold>Data wrangling (DW):</bold> Removing or replacing factually incorrect values is essential in pre-processing data. BEpipeR supports users therein by allowing them to replace or remove these values through exact and pattern-based approaches. To enable this option, the user sets rDW in paramMAIN for the respective data set to &#x2018;yes&#x2019;. Subsequently, they provide additional information to paramDW, the helper file for this operation. This information includes the data set&#x2019;s base ID (Dataset_ID), whether the replacement is value- (Class = value) or pattern- (Class = pattern) based, the value to replace (Value_old), and the value to replace with (Value_new). Noteworthy, the pattern-based approach even allows for the deletion of matching rows by specifying Value_new as NULL. In contrast, value-based row deletions are best performed by subsetting (see next step). Generally, modifications are applied in the order of listing (i.e., from top to bottom). This means that multiple modifications can be applied to the same data set by listing the same base ID in multiple rows of paramDW, each time with a different modification.</p>
                        </list-item>
                        <list-item>
                            <label>3</label>
                            <p>
                                <bold>Subsetting (SUB):</bold> BEpipeR&#x2019;s data wrangling capabilities are enriched by its subsetting function, which allows for the filtering of rows using exact matches. To achieve this, upon setting rSUB for the respective data set in paramMAIN to &#x2018;yes&#x2019;, users provide the following information to this operation&#x2019;s helper file, paramSUB: the data set&#x2019;s base ID (Dataset_ID), the name of the column to perform the subsetting on (Subset_variable), the comparison operator to use (Operator), and the entry to retain or remove (Subset_level). As for DW, multiple modifications that are applied consecutively from top to bottom can be requested for the same data set.</p>
                        </list-item>
                        <list-item>
                            <label>4</label>
                            <p>
                                <bold>Fallbacks (FB):</bold> Data sets that establish relationships between taxonomic entities and their abundances often feature taxonomic levels not resolved to completion. For instance, a tree species data set might feature aggregated species, such as 
                                <italic toggle="yes">Quercus spec.</italic>, alongside species that were fully resolved. To remedy this issue, BEpipeR allows users to perform fallbacks to more basal (taxonomic) levels. To invoke this operation, users set rFB in paramMAIN to &#x2018;yes&#x2019; in addition to providing the following information: the name of the column to perform the fallback on (FBcol), the separator used to delineate the different levels of information in FBcol (FBsep), and FBsub, the index of the substring of interest. BEpipeR uses this information to string-split the information in FBcol at the separator specified, upon which the substring of interest is retained by its index. Subsequently, BEpipeR sums abundances per plot at the newly generated factor level, effectively collapsing abundances at a more basal (taxonomic) level. Abundance scores harmonized in this way may seamlessly be used in downstream processing steps.</p>
                        </list-item>
                        <list-item>
                            <label>5</label>
                            <p>
                                <bold>Reshaping (RES):</bold> Usually, plot data are most easily processed in wide format, with rows denoting plots and columns representing the respective variables. In this step, BEpipeR allows users to cast data to this format while coding absent combinations as Not Available (NA). To flag a data set for reshaping, the user sets rRES in paramMAIN to &#x2018;yes&#x2019;, upon which they supply the factors column that will be used in constructing new column names to RESvar. Please note: i) Factors not used in reshaping are collapsed by calculating plot-wise means. Hence, the resulting data set features unique plots in the first column, followed by column-wise environmental data. ii) Due to NA as missing combination value, RES is mutually exclusive with calculating alpha diversity indices (DI, see below). Hence, if you would like to calculate these indices, keep the data in long format. Do not reshape climate data obtained through BExIS&#x2019; climate tool to be processed in CLIM (see below) either.</p>
                        </list-item>
                        <list-item>
                            <label>6</label>
                            <p>
                                <bold>Standardization by variable (STD):</bold> Abundance scores often rely on sampling effort, with differential effort potentially giving rise to differential abundance, preventing meaningful plot-based comparisons. While various data-dependent normalization/standardization approaches exist (e.g., 
                                <xref ref-type="bibr" rid="ref51">Weiss et al. 2017</xref>, 
                                <xref ref-type="bibr" rid="ref28">Lin and Peddada 2020</xref>, 
                                <xref ref-type="bibr" rid="ref55">Xia 2023</xref>), data sets that feature information on sampling effort are best normalized using this information. To achieve this, BEpipeR allows all numeric variables of a data set to be normalized by information provided in a user-specified column of that data set. To do so, users set rSTD for the respective data set to &#x2018;yes&#x2019; and provide the variable&#x2019;s name to be used for data set-wide standardization (STDvar) to paramMAIN. The result is standardized variables that permit a meaningful and straightforward integration in downstream processing steps.</p>
                        </list-item>
                    </list>
                </p>
                <p>
                    <underline>Quality checks</underline>
                </p>
                <p>
                    <bold>Multi-mode outlier detection (QC):</bold> To support users in their data exploration and to spot potential invalid values, such as un-disclosed numeric NA values or species aggregates that result in artificially high or low abundance scores, BEpipeR performs column-wise outlier detection. This is done upon setting QC in paramMAIN to &#x2018;yes&#x2019;. To avoid false alarms in non-combined mode (i.e., forest or grassland), plots not conforming to the desired ecosystem are excluded in the detection procedure. In combined mode, outlier detection is performed separately for each ecosystem. Currently, BEpipeR features two outlier detection approaches that are based on constructing confidence intervals as multiples of standard deviation (sd, default: 18) around column means and medians. Noteworthy, lower confidence interval bounds are adjusted according to their column means/medians. More specifically, for a given column, if the lower confidence interval bound is negative but the corresponding column mean/median is positive, the respective lower bound is adjusted to zero. This approach acknowledges that many environmental data might be positive and increases the detection sensitivity of BEpipeR at the lower end of the data distribution. Flagged data sets, filtered for columns with potential outliers, are exported to the global R environment for inspection with the naming scheme &#x2018;QC_[baseID]_flagged_[MEAN|MEDIAN]_[forest|grassland]&#x2019;. If justified, outliers may be removed by the user through DW, SUB, or a combination thereof. Note that, by default, BEpipeR will not perform QC on climate data obtained through BExIS&#x2019; climate tool (base ID: 19007), regardless of what users set QC for this data set to. This is because these data have already been extensively quality-checked; hence, any outliers found are most likely false positives. Future releases of BEpipeR will enhance the outlined detection approaches by utilizing data structure information exclusively obtained through BExIS&#x2019; API.</p>
                <p>
                    <underline>Data aggregation</underline>
                    <list list-type="order">
                        <list-item>
                            <label>1</label>
                            <p>
                                <bold>Dataset-intern aggregation (DIA):</bold> The aim of this step is to construct data-set and plot-level-wise aggregation metrics, regardless of whether data is provided in long or wide format. To accommodate both data structures, BEpipeR provides two aggregation approaches of which one must be provided to the DIAappr column in paramMAIN upon setting DIA for the respective data set to &#x2018;yes&#x2019;. For either aggregation approach and the grouping variables provided, BEpipeR computes mean, median, sd, and median absolute deviation (mad) values.</p>
                            <p>DIAappr = 2 allows users to aggregate data in long format. Currently, up to three grouping variables (plot IDs + two non-plot variables) are supported and might be provided to paramMAIN&#x2019;s DIAcol1, DIAcol2, and DIAcol3 columns.</p>
                            <p>DIAappr = 3 permits the plot-wise aggregation of data in wide format, meaning only plot IDs as grouping variable are currently supported (i.e., both DIAcol2 and DIAcol3 must be kept empty).</p>
                        </list-item>
                        <list-item>
                            <label>2</label>
                            <p>
                                <bold>Group-intern aggregation (GIA):</bold> This step allows users to combine multi-measurement (e.g., multi-year) data split across multiple data sets with the subsequent calculation of summary statistics (mean, median, sd, and mad), while maintaining up to three grouping variables (plot IDs + two non-plot variables). This processing is invoked by setting GIA in paramMAIN to &#x2018;yes&#x2019;, followed by providing grouping variables to the GIAcol1, GIAcol2, and GIAcol3 columns. Noteworthy, to reduce file sizes, amplicon sequencing data sets might have been shrunk by i) omitting plot &#x00d7; taxonomic unit combinations with zero abundance, and/or ii) deleting all-zero abundance taxonomic units, resulting in deliberately not covering all taxonomic units across all years. The first issue can be corrected for by enabling abundance correction (GIAabcorr = &#x2018;yes&#x2019;), which effectively rebuilds the plot &#x00d7; taxonomic unit matrix with missing combinations coded as zeros. The second issue is resolved by BEpipeR upon setting taxonomic units correction (GIAtaxcorr) to &#x2018;yes&#x2019;, which ensures that all taxonomic units are present across all years (absent units are introduced with all-zero abundance). Critical, in paramMAIN&#x2019;s Group column, users must assign a unique number to the data sets to be combined. In addition, for all focal variables, it is the user&#x2019;s responsibility to ensure that they are shared between the group&#x2019;s data sets and that their order of listing in paramMAIN is identical. Future releases of this pipeline will automatize these steps by falling back to the group&#x2019;s shared variables, followed by their re-organization and processing.</p>
                        </list-item>
                        <list-item>
                            <label>3</label>
                            <p>
                                <bold>Climate (CLIM):</bold> BEpipeR&#x2019;s ability to process environmental data is enriched by its ability to process yearly climate aggregates obtained through BExIS&#x2019; climate tool (
                                <xref ref-type="bibr" rid="ref54">W&#x00f6;llauer et al. 2021</xref>). To obtain data processable by BEpipeR, users choose the following parameters in the web tool&#x2019;s graphical user interface for generating their data: Spatial aggregation: separate plots; Aggregation of time: year; options: &#x2018;write all plots in one CSV-File&#x2019;, &#x2018;one plot timeseries after another&#x2019;, &#x2018;write header in CSV-Files&#x2019;, &#x2018;include column &#x201c;plotID&#x201d;&#x2019;, Calendar columns: year. Additionally, they request the parameter description file to be included in the zip archive to be generated. We recommend users to set &#x2018;quality check of measured values&#x2019; to &#x2018;3: physical range + step + empirical check&#x2019; to obtain climate data that fulfils the highest quality standards. Users are free to choose whether they enable the interpolation of missing values. If they opt to do so, we advise that they request the inclusion of the &#x2018;qualitycounter&#x2019; column in their aggregated climate data, which provides information on the total and interpolated number of data points underlying each of the yearly climate aggregates. The presence of this column in the climate data is used as indicator for BEpipeR to remove weakly supported data points (percentage interpolated &gt; 60%), a step that is skipped if this column is not found in the data. If interpolated information is provided by BExIS&#x2019; climate tool, users are advised to not re-arrange the data column-wise, as this will break the association between the &#x2018;qualitycounter&#x2019; and data columns. However, row-wise operations, such as the exclusion of undesired years through SUB, are permitted.</p>
                            <p>To calculate reliable multi-year summary statistics (mean, median, sd, mad, min, and max), BEpipeR allows users to exclude variables not based on a minimal number of data points (i.e., years). By default, this filter is set to four but may be adjusted by users interested in retaining variables that satisfy a more stringent filtering approach (CLIM_min_years, 
                                <xref ref-type="table" rid="T1">Table 1</xref>). These users should keep in mind that, depending on the years they want to obtain temporal coverage over, the replacement of plot HEW02 with HEW51 in 2016 might complicate or even negate the acquisition of long-term time-series climate data. To assist users in this filtering, BEpipeR issues a warning if their strategy is too stringent and results in retaining only few or no climate variables at all.</p>
                        </list-item>
                    </list>
                </p>
                <p>
                    <underline>Diversity calculations</underline>
                </p>
                <p>To go beyond a simple description of abundances, we allow users to calculate alpha diversity indices. To do so, they set DI for the respective data set in paramMAIN to &#x2018;yes&#x2019;. First, this triggers the reshaping of the respective data set to wide format with zero as value for combinations not present. This step may be skipped for data sets already in this format by providing their base ID to the DI_reshape_whitelist variable (
                    <xref ref-type="table" rid="T1">Table 1</xref>). Second, users might opt to normalize the data provided through rarefaction by setting RF in paramMAIN to &#x2018;yes&#x2019; and providing the number of repetitions to perform (as multiple of ten) to RFnrep. We are well aware of the ongoing debate on proper normalization and the alleged shortcomings of rarefaction (
                    <xref ref-type="bibr" rid="ref31">McMurdie and Holmes 2014</xref>, 
                    <xref ref-type="bibr" rid="ref41">Schloss 2024</xref>). We acknowledged this by purposely deciding on repeated rarefaction, as an extension of normal rarefaction, for the following reasons: i) rarefaction is a highly tractable and easy-to-grasp concept, ii) data normalized through rarefaction might seamlessly be used in calculating alpha diversity indices (e.g., 
                    <xref ref-type="bibr" rid="ref49">Walters and Martiny 2020</xref>, 
                    <xref ref-type="bibr" rid="ref41">Schloss 2024</xref>), iii) rarefaction might still be the most frequently used normalization technique for amplicon data, and implemented in many processing pipelines such as QIIME (
                    <xref ref-type="bibr" rid="ref11">Caporaso et al. 2010</xref>) and mothur (
                    <xref ref-type="bibr" rid="ref42">Schloss et al. 2009</xref>), and iv) rarefaction noticeably decreases the discrepancy between OTU and ASV data (
                    <xref ref-type="bibr" rid="ref49">Walters and Martiny 2020</xref>, 
                    <xref ref-type="bibr" rid="ref14">Chiarello et al. 2022</xref>), allowing for a higher degree of comparability regardless of the type of clustering applied. Most importantly, repeated rarefaction addresses the often-criticised data loss by random subsampling through performing these subsamplings repeatedly, effectively reducing the impact of single stochastic processes in normalizing data (
                    <xref ref-type="bibr" rid="ref10">Cameron et al. 2021</xref>). Noteworthy, before repeated rarefaction, potential decimal values in the abundance table that resulted from upstream multi-year aggregations are rounded up to the next integer, an approach that prevents positive values smaller than 0.5 from falsely being set to zero (i.e., absence). Users might gauge the success of the normalization by inspecting rarefaction curves and/or slopes exported to the &#x2018;Output&#x2019; directory (
                    <xref ref-type="table" rid="T3">Table 3</xref>). Following repeated rarefaction and the rounding of potentially produced decimal abundance scores to their nearest integer, BEpipeR computes alpha diversity indices, including species richness, Menhinick (
                    <xref ref-type="bibr" rid="ref32">Menhinick 1964</xref>), Margalef (
                    <xref ref-type="bibr" rid="ref29">Margalef 1973</xref>), Shannon-Wiener (
                    <xref ref-type="bibr" rid="ref44">Shannon and Weaver 1949</xref>), Simpson (
                    <xref ref-type="bibr" rid="ref45">Simpson 1949</xref>), and the inverse Simpson index. Because most alpha diversity indices are meaningless for empty sampling units, only plots with non-zero richness are retained for later joining.</p>
                <p>Importantly, some data sets may be incorporated into BEpipeR&#x2019;s workflow as they are (i.e., without the need for DIA, GIA, CLIM, RF, or DI). This must be signalled to BEpipeR by setting AsIt in paramMAIN to &#x2018;yes&#x2019;, upon which no aggregation is performed on these data sets. This functionality allows BEpipeR to incorporate highly sophisticated ready-to-use data sets in a straightforward fashion.</p>
                <p>
                    <underline>Post-processing</underline>
                    <list list-type="order">
                        <list-item>
                            <label>1</label>
                            <p>
                                <bold>Data joining (MRG):</bold> Upon ensuring that all data have been processed fully by inspecting the relevant datasets_table columns, BEpipeR left-joins all available data to the plot IDs template constructed upstream. For data sets with leftover grouping variables (apparent by the data set having more rows than the plot IDs template), BEpipeR attempts to accommodate these by repeated reshaping to wide format until the data set&#x2019;s number of rows conforms to the expectation, or no more potential grouping variables are found. In the latter case, the user is warned. In joining data, BEpipeR appends complete data set IDs to column headers to allow for a straightforward back-tracing of information to their origin.</p>
                        </list-item>
                        <list-item>
                            <label>2</label>
                            <p>
                                <bold>Quality checks (FQC):</bold> The aim of this processing step is two-fold. First, BEpipeR performs several quality checks to ensure data consistency and the successful execution of upstream processing steps. For instance, it warns if additional rows were introduced in left-joining, plot designations are found in the values matrix, or if duplicated column headers or headers without data set ID are found, and it maximises the data&#x2019;s downstream usability by replacing potential spaces in column names with underscores. Second, it removes undesired information from the composite data set constructed in MRG to prepare the data for variables selection (see below) or direct use. This is achieved by replacing NaN (Not a Number) and Inf (infinite) values with NA and excluding non-numeric columns. The resulting intermediate composite data set (FQC_env_var_composite_intermediate.csv, 
                                <xref ref-type="table" rid="T3">Table 3</xref>) might still contain NA cells and mono-value columns. However, it may already be of interest to users who apply statistical frameworks capable of tolerating such input data. This composite data set is processed further by excluding mono-value columns and plots not conforming to the BEpipeR mode specified. Additionally, as some plots might render obtaining a large complete-cases data set difficult by breaking up otherwise continuous long-term time-series data (e.g., HEW51, established in 2016), we allow users to exclude these plots (FQC_plots_to_remove, 
                                <xref ref-type="table" rid="T1">Table 1</xref>) before removing any columns with NA values. The resulting complete-cases composite data set is subsequently exported (FQC_env_var_composite_complete.csv, 
                                <xref ref-type="table" rid="T3">Table 3</xref>). For users applying statistical frameworks capable of processing multi-colinear data, this file may already be used as input for their analyses.</p>
                        </list-item>
                        <list-item>
                            <label>3</label>
                            <p>
                                <bold>Variables selection (VS):</bold> Understanding the correlation structure underlying explanatory data is pivotal for the thought- and meaningful interpretation of statistical models. In this processing step, we support users in two ways: i) through BEpipeR, we provide information on correlations underlying the complete composite data set produced in FQC, and ii) condense the data to a set of less correlated variables. Insights into the correlation structure are gained by calculating Pearson correlation coefficients (r) and associated false discovery rate-corrected (
                                <xref ref-type="bibr" rid="ref7">Benjamini and Hochberg 1995</xref>) 
                                <italic toggle="yes">P</italic> values between all variable pairs in FQC_env_var_composite_complete.csv (
                                <xref ref-type="table" rid="T3">Table 3</xref>). This information is further used to warn users if significant (
                                <italic toggle="yes">P</italic> &lt; 0.05) pairwise comparisons with unusually high goodness of fit (r ~ 1) are observed, upon which the user decides whether these comparisons are justifiable or instead indicative of issues in upstream data processing.</p>
                            <p>Reducing the data set to a suite of less correlated variables is achieved by variables selection through variance inflation factor (VIF) analyses. Noteworthy, users might often have justified 
                                <italic toggle="yes">a priori</italic> assumptions about focal variables, and hence would like to retain these in their data set for easier model interpretation. We acknowledge this and provide users with the ability to protect their focal variables from removal by supplying their names to the VS_protected_variables variable in the script (
                                <xref ref-type="table" rid="T1">Table 1</xref>). To maximise the downstream usability of the data generated, BEpipeR performs variables selection for a range of VIF thresholds from two to ten, with smaller values denoting a more stringent exclusion approach. For each VIF threshold, multiple files are exported to the &#x2018;Output&#x2019; directory (
                                <xref ref-type="table" rid="T3">Table 3</xref>).</p>
                        </list-item>
                        <list-item>
                            <label>4</label>
                            <p>
                                <bold>Metadata compilation and export (COMD):</bold> To allow for a straightforward data re-usage, the provisioning of metadata is a cumbersome yet necessary duty to all data scientists. The Biodiversity Exploratories provides these metadata in JSON &#x2018;datastructure&#x2019; files for normal data sets and in a csv file for data generated through BExIS&#x2019; climate tool. BEpipeR utilizes this information to generate metadata for variables featured in the complete composite data set. To do so, BEpipeR strips away data set IDs and aggregation suffixes from headers. Subsequently, for variable names isolated this way, their metadata (such as variable description and unit information) are extracted, enriched with information on the processing performed through BEpipeR, and exported as tabular data to the &#x2018;Output&#x2019; directory (
                                <xref ref-type="table" rid="T3">Table 3</xref>). Note that, as for data sets, metadata files are retrieved by their base ID, and hence, multiple metadata files with the same base ID are not supported in the &#x2018;Metadata&#x2019; directory.</p>
                        </list-item>
                    </list>
                </p>
                <table-wrap id="T3" orientation="portrait" position="float">
                    <label>Table 3. </label>
                    <caption>
                        <title>Information on the files generated by the BEpipeR pipeline and exported to the 'Output' directory.</title>
                        <p>Placeholders are in square brackets.</p>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Processing step</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">File name</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Description</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Rarefaction (RF)</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">RF_[baseID]_rarefaction_curves_subsample_[subsampleSize].png</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Rarefaction curves depicting the relationship between subsample size on the x and richness on the y axis. The vertical line marks the subsample size used for rarefaction that is also provided in the file's name. Horizontal lines visualize plot-based richnesses following a single rarefaction. Generated with vegan's rarecurve() function.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Rarefaction (RF)</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">RF_[baseID]_rarefaction_slopes_subsample_[subsampleSize].csv</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">The slopes of rarefaction curves constructed with vegan's rarecurve() function at the subsample size specified in the file's name. Generated with vegan's rareslope() function.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Final quality control (FQC)</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">FQC_env_var_composite_intermediate.csv</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">The composite data set constructed by left-joining all data to the spatially explicit plot IDs template with the subsequent replacement of NaN (Not a Number) and Inf (infinite) values with NA (Not Available), as well as the exclusion of non-numeric columns. Metadata columns provide experimental (EP) and grid plot (GP) designations with (Plotid0) and without (Plotid) leading zeros, as well as location information in World Geodetic System 1984 (WGS84). This information is followed by the variables produced, with their headers carrying processing information and the full data set IDs they originate from.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Final quality control (FQC)</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">FQC_env_var_composite_complete.csv</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">FQC_env_var_composite_intermediate.csv after excluding plots not in concordance with the BEpipeR mode specified, in addition to user-defined ones (
                                    <xref ref-type="table" rid="T1">Table 1</xref>). Mono-value columns and those with NAs have been excluded as well, making this data set a complete-cases one.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Variables selection (VS)</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">VS_pearson_corrMat.csv</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">The Pearson's r matrix as produced by Hmisc's rcorr() function for all non-metadata variables in FQC_env_var_composite_complete.csv.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Variables selection (VS)</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">VS_pearson_numbObs.csv</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">The number-of-observations matrix underlying the values in VS_pearson_corrMat.csv.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Variables selection (VS)</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">VS_pearson_pVals.csv</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">The 
                                    <italic toggle="yes">P</italic> values matrix to the r values stored in VS_pearson_corrMat.csv.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Variables selection (VS)</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">VS_corr_flat_complete.csv</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">A flattened representation of VS_pearson_corrMat.csv and VS_pearson_pVals.csv. Diagonal values as well as false discovery rate (FDR)-corrected 
                                    <italic toggle="yes">P</italic> values are provided.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Variables selection (VS)</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">VS_VIF[VIFthreshold]_VS_analysed_vars.csv</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">The names of the variables that underwent variables selection by variance inflation factor (VIF) analysis with the VIF threshold specified. Typically, this is all non-metadata variables from FQC_env_var_composite_complete.csv.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Variables selection (VS)</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">VS_VIF[VIFthreshold]_VS_excluded_vars.csv</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">The variables excluded by usdm's vifstep() function at the VIF threshold specified.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Variables selection (VS)</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">VS_VIF[VIFthreshold]_VS_corr_matrix.csv</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">A Pearson's r matrix for the variables retained by vifstep() at the VIF threshold specified.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Variables selection (VS)</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">VS_VIF[VIFthreshold]_VS_retained_vars_scores.csv</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">The VIF scores of variables retained at the VIF threshold specified.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Variables selection (VS)</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">VS_VIF[VIFthreshold]_VS_composite.csv</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">FQC_env_var_composite_complete.csv after excluding variables listed in VS_VIF[VIFthreshold]_VS_excluded_vars.csv.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Compiling metadata (COMD)</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">COMD_metadata_compiled.csv</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">The compiled metadata of non-metadata variables in FQC_env_var_composite_complete.csv. For each variable, the following information is provided:
                                    <break/>
                                    <p>
                                        <list list-type="bullet">
                                            <list-item>
                                                <label>-</label>
                                                <p>The data set the variable originates from (with (FullID) and without (BaseID) version suffix).</p>
                                            </list-item>
                                            <list-item>
                                                <label>-</label>
                                                <p>Its name as extracted from FQC_env_var_composite_complete.csv (Composite_var).</p>
                                            </list-item>
                                            <list-item>
                                                <label>-</label>
                                                <p>Its name after removing processing suffixes (Composite_var_trimmed), as well as the processing information extracted (Aggr_string_1, Aggr_string_2).</p>
                                            </list-item>
                                            <list-item>
                                                <label>-</label>
                                                <p>Its metadata as extracted from Biodiversity Exploratories JSON 'datastructure' files (Variables.Id, Variables.Label, Variables.Description, Variables.unit.Name, Variables.unit.Description, and Variables.dataType.Name).</p>
                                            </list-item>
                                            <list-item>
                                                <label>-</label>
                                                <p>The processing performed by BEpipeR (in separate columns: rSUB, rDW, rSTD, rRES, rFB, DIA, GIA, RF, and DI; in concatenated fashion: Proc_info). See paramMAIN's dictionary for more information.</p>
                                            </list-item>
                                        </list>
                                    </p>
                                </td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>
                    <underline>Minimal system requirements</underline>
                </p>
                <p>To facilitate the adoption of the pipeline, we designed BEpipeR to be executable even on entry-level consumer hardware. CPU-wise, BEpipeR should execute fine on machines with &#x2265; 2 physical cores. RAM-wise, its minimal requirements are primarily dictated by the size of the input data sets users opt to process, as well as the type of processing requested. For instance, input files &lt; 100 KB might consume negligible amounts of working memory, while large amplicon sequencing data sets (&gt; 200 MB) might require significantly more, in particular if they are rarefactioned with an excessively high number of repetitions. Still, to prevent working memory from becoming a limiting factor on typical consumer-level hardware, repeated rarefaction is performed in chunks of ten, temporary files might be written to disk (&#x2018;Temp&#x2019; directory, 
                    <xref ref-type="table" rid="T2">Table 2</xref>), and large elements are cleared from the pipeline&#x2019;s workspace immediately after they have become obsolete. With respect to processing times, BEpipeR might spend most of its time on the repeated rarefaction of large amplicon data sets, as well as performing variables selection on large composite data sets (&gt; 150 rows, &gt; 1000 columns). However, since these steps harness parallel processing, they can be sped up considerably by switching to more capable hardware.</p>
            </sec>
        </sec>
        <sec id="sec9">
            <title>Use cases</title>
            <p>To demonstrate BEpipeR&#x2019;s rich functionalities with minimal effort to the user, we ship the pipeline with exemplary data, including ten input data sets, corresponding metadata, and filled-out parameter files (
                <xref ref-type="table" rid="T2">Table 2</xref>). Both input and metadata files mimic real Biodiversity Exploratories information, which cannot be included for various reasons. In addition, the pipeline includes all files produced by processing the provided input data with default settings (
                <xref ref-type="table" rid="T1">Table 1</xref>) to allow users to familiarize themselves with the output produced (
                <xref ref-type="table" rid="T3">Table 3</xref>). In the following, we provide a concise summary of the BEpipeR workflow using the provided input data; for brevity, data sets are referred to by their base ID, and only steps required for understanding the provided example are listed. i) Plot designations and location information in data set 20826 are used to construct the spatially explicit plot IDs template. ii) Species abundance data in 19848 contain a non-valid numerical NA value (-88888888), which is replaced with NA in DW. iii) With the entry &#x2018;None&#x2019;, the abundance data set 19849 contains a non-valid factor level in its plot ID column. This information is excluded through SUB. iv) Species in data set 18269 could not be completely resolved (Genus3_spec), complicating meaningful comparisons between the taxonomic entities in this data set. This is solved by collapsing abundance information at the genus level through FB, followed by reshaping this data to wide format in RES. v) Abundance data in 18526 are not standardized/normalized for sampling effort. Instead, this information is provided in the &#x2018;nobs&#x2019; column of this data set, which is subsequently used to normalize abundance scores and restore the inter-plot comparability of the data. vi) After all data sets have passed QC, the two pH measurements per plot provided in 14447 are summarized plot-wise through DIA. vii) Multi-year abundance data split over the data sets 19848, 19849, and 19850 are summarized at plot and species level through GIA. viii) Multi-year climate data (19007) obtained through BExIS&#x2019; climate tool is processed by CLIM. Note that, because no &#x2018;qualitycounter&#x2019; column is provided, the removal of weakly supported data points is skipped. ix) Amplicon sequencing data in 25067 are first reshaped to wide format, followed by their repeated rarefaction with 150 repetitions, and the calculation of alpha diversity indices. Subsequently, all data are left-joined to the plot IDs template (MRG). Noteworthy, because data set 14567 did not require any processing, it is incorporated as is. The resulting composite data set is quality-controlled and filtered (FQC), and variables selection (VS) is performed. Processing is concluded by the compilation and export of metadata (COMD) to the variables in the complete composite data set constructed in FQC.</p>
        </sec>
        <sec id="sec10" sec-type="discussion">
            <title>Discussion</title>
            <p>With BEpipeR, we provide a feature-rich pipeline for processing and synthesizing Biodiversity Exploratories data. To our knowledge, this is the first framework of this consortium to do so in a user-friendly and highly reproducible fashion. We acknowledge that embedding it in the Biodiversity Exploratories with its many projects comes with both challenges and benefits. We recognise that providing a comprehensive framework for the processing of the consortium&#x2019;s many data sets is a daunting task, as many, potentially conflicting, interests need to be satisfied. Hence, for the near-time development of this pipeline, we see the following three focal points for improvement: i) Streamlining the user experience by the improved handling of errors, increasing the pipeline&#x2019;s verbosity, and providing the ability to parse even more aggregation information through the existing parameter files. ii) The extension of existing features, such as data normalization through transformation (e.g., 
                <xref ref-type="bibr" rid="ref30">McKnight et al. 2019</xref>, 
                <xref ref-type="bibr" rid="ref9">Boshuizen and Te Beest 2023</xref>). iii) Increasing rigour in ensuring data integrity by implementing access to BExIS&#x2019; API and, thereby, information obtainable solely through this channel.</p>
            <p>Noteworthy, many re-usability issues BEpipeR corrects for would have been prevented in the first place by adopting more stringent standard operational procedures that ensure data re-usage with minimal user effort. Briefly, we restrict ourselves to the issues encountered most often while working on a subset (~ 150 data sets from 2009 onwards) of the Biodiversity Exploratories&#x2019; information stock: i) Non-harmonized plot information: Data re-usage might be drastically improved by encoding plot information in a unified way. This includes, among others, making the plot ID column the first column of data sets, using unified column names for this type of information, and enforcing the experimental plot scheme with leading zeros throughout. ii) Non-harmonized NA and NODATA values: Consortium-wide non-numerical NA and NODATA values would prevent the (mis)use of numerical values for encoding this information. This issue is exacerbated by the fact that information on these values can only reliably be obtained through BExIS&#x2019; API, a resource most scientists might not be aware of or familiar with. iii) Non-harmonized encoding of factors: Factors should be encoded as character strings to facilitate their detection and prevent aggregation over these values. These and other potential improvements should be accompanied by more stringent quality control and data curation through the Biodiversity Exploratories&#x2019; data management team to prevent mal-formatted, incomplete, or erroneous data sets from being listed as ready-to-use in BExIS database. We also highlight the need to revise any data sets that may not adhere to these standards. While most of these suggestions mean minimal effort for data owners who upload new data sets, and a reasonable yet essential one for this consortium&#x2019;s data management team, they might drastically reduce hands-on time for scientists that re-use these data, and ultimately pave the way to making these data exploitable through large inter-framework databases (
                <xref ref-type="bibr" rid="ref19">Finkel et al. 2020</xref>).</p>
            <p>In constructing BEpipeR, we aimed to balance its specificity to the Biodiversity Exploratories with general applicability. This means that while this pipeline was written to solve numerous Biodiversity Exploratories-specific data issues, it might nevertheless be adapted to the needs of other large research consortia. This might be achieved most easily by, among others, implementing a step that recodes other consortia&#x2019;s plot designations to the Biodiversity Exploratories&#x2019; experimental plot designation scheme, preventing them from having to adjust most regular expression-based pattern matching in BEpipeR. These consortia may also benefit from the modularity of BEpipeR, where each major loop is a well-defined processing step, allowing for straightforward modifications to the workflow. Additionally, parsing-wise, users may tailor paramMAIN to their needs by deleting or replacing all columns not strictly required for BEpipeR&#x2019;s operation (as indicated in the file&#x2019;s dictionary). Finally, changes to BEpipeR&#x2019;s source code are aided by a consistent and traceable naming scheme for variables, as well as detailed comments on the code and the underlying reasoning.</p>
            <p>To conclude, even though this project might be facing substantial challenges, it is the Exploratories&#x2019; large base of researchers and scientific staff that has the potential to render this endeavour a success. People interested can contribute both conceptually, by providing suggestions for future implementations, and preferably, by coding. In the best case, their participation is fuelled by having understood the nature of this framework, that is, its capability to boost each project&#x2019;s data visibility and impact by providing it in a composite data set for the most straightforward re-use possible. As we will demonstrate elsewhere, BEpipeR can be used to generate expansive composite data sets with the potential to further insights into complex evolutionary and ecological matters.</p>
        </sec>
        <sec id="sec12">
            <title>Ethics and consent</title>
            <p>Ethical approval and consent were not required.</p>
        </sec>
    </body>
    <back>
        <sec id="sec15" sec-type="data-availability">
            <title>Data availability</title>
            <p>Example data used in this publication are available as part of BEpipeR on 
                <ext-link ext-link-type="uri" xlink:href="https://github.com/marcelglueck/BEpipeR">GitHub</ext-link> and 
                <ext-link ext-link-type="uri" xlink:href="https://zenodo.org/records/13838117">Zenodo</ext-link> (
                <xref ref-type="bibr" rid="ref22">Gl&#x00fc;ck et al. 2024</xref>).</p>
            <p>Data are available under the terms of the 
                <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International license</ext-link> (CC-BY 4.0).</p>
        </sec>
        <sec id="sec11">
            <title>Software availability</title>
            <p>Software and source code available from: 
                <ext-link ext-link-type="uri" xlink:href="https://github.com/marcelglueck/BEpipeR">https://github.com/marcelglueck/BEpipeR</ext-link>
            </p>
            <p>Archived source code at time of publication: 
                <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5281/zenodo.13838117">https://doi.org/10.5281/zenodo.13838117</ext-link> (
                <xref ref-type="bibr" rid="ref22">Gl&#x00fc;ck et al. 2024</xref>)</p>
            <p>License: LGPL-3.0</p>
        </sec>
        <ack>
            <title>Acknowledgements</title>
            <p>We thank the Copyright Office of T&#x00fc;bingen University for their assistance in finding a suitable license for this pipeline and Andreas Ziegler for helping with its shipping. We also acknowledge support from the Open Access Publishing Fund of T&#x00fc;bingen University for covering publication fees. Icons in 
                <xref ref-type="fig" rid="f1">
Figure 1</xref> were obtained from 
                <ext-link ext-link-type="uri" xlink:href="http://flaticon.com">flaticon.com</ext-link>; in their order of first appearance: xnimrodx, lakonicon, Bharat Icons, gravisio, Ida Desi Mariana, phatplus, Freepik, Kharisma, POD Gladiator, Uniconlabs, Iconjam, KP Arts, Mayor Icons, karthiks_18, and IwitoStudio. This work is based on data obtained within the DFG Priority Program 1374 &#x2018;Infrastructure-Biodiversity-Exploratories&#x2019;. We thank the staff of the three exploratories, the BE office and the BExIS team for their work in maintaining the plot and project infrastructure, and Markus Fischer, the late Elisabeth Kalko, Eduard Linsenmair, Dominik Hessenm&#x00f6;ller, Jens Nieschulze, Daniel Prati, Ingo Sch&#x00f6;ning, Fran&#x00e7;ois Buscot, Ernst-Detlef Schulze, and Wolfgang W. Weisser for their role in setting-up the Biodiversity Exploratories project.</p>
        </ack>
        <ref-list>
            <title>References</title>
            <ref id="ref1">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Allan</surname>
                            <given-names>E</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bossdorf</surname>
                            <given-names>O</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Dormann</surname>
                            <given-names>CF</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Interannual variation in land-use intensity enhances grassland multidiversity.</article-title>
                    <source>

                        <italic toggle="yes">Proc. Natl. Acad. Sci.</italic>
</source>
                    <year>2014</year>;<volume>111</volume>(<issue>1</issue>):<fpage>308</fpage>&#x2013;<lpage>313</lpage>.
                    <pub-id pub-id-type="pmid">24368852</pub-id>
                    <pub-id pub-id-type="doi">10.1073/pnas.1312213111</pub-id>
                    <pub-id pub-id-type="pmcid">PMC3890898</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref2">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Allan</surname>
                            <given-names>E</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Manning</surname>
                            <given-names>P</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Alt</surname>
                            <given-names>F</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Land use intensification alters ecosystem multifunctionality via loss of biodiversity and changes to functional composition.</article-title>
                    <source>

                        <italic toggle="yes">Ecol. Lett.</italic>
</source>
                    <year>2015</year>;<volume>18</volume>(<issue>8</issue>):<fpage>834</fpage>&#x2013;<lpage>843</lpage>.
                    <pub-id pub-id-type="pmid">26096863</pub-id>
                    <pub-id pub-id-type="doi">10.1111/ele.12469</pub-id>
                    <pub-id pub-id-type="pmcid">PMC4744976</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref3">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Anderson-Teixeira</surname>
                            <given-names>KJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Davies</surname>
                            <given-names>SJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bennett</surname>
                            <given-names>AC</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>CTFS-Forest GEO: a worldwide network monitoring forests in an era of global change.</article-title>
                    <source>

                        <italic toggle="yes">Glob. Chang. Biol.</italic>
</source>
                    <year>2015</year>;<volume>21</volume>(<issue>2</issue>):<fpage>528</fpage>&#x2013;<lpage>549</lpage>.
                    <pub-id pub-id-type="pmid">25258024</pub-id>
                    <pub-id pub-id-type="doi">10.1111/gcb.12712</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref4">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Baker</surname>
                            <given-names>M</given-names>
                        </name>
</person-group>:
                    <article-title>Reproducibility crisis.</article-title>
                    <source>

                        <italic toggle="yes">Nature.</italic>
</source>
                    <year>2016</year>;<volume>533</volume>(<issue>26</issue>):<fpage>353</fpage>&#x2013;<lpage>366</lpage>.</mixed-citation>
            </ref>
            <ref id="ref5">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Baker</surname>
                            <given-names>M</given-names>
                        </name>
</person-group>:
                    <article-title>Scientific computing: Code alert.</article-title>
                    <source>

                        <italic toggle="yes">Nature.</italic>
</source>
                    <year>2017</year>;<volume>541</volume>(<issue>7638</issue>):<fpage>563</fpage>&#x2013;<lpage>565</lpage>.
                    <pub-id pub-id-type="doi">10.1038/nj7638-563a</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref6">
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Barrett</surname>
                            <given-names>T</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Dowle</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Srinivasan</surname>
                            <given-names>A</given-names>
                        </name>
</person-group>:
                    <article-title>data.table: Extension of &#x2018;data.frame&#x2019;.</article-title>
                    <year>2023</year>.
                    <ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=data.table">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref7">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Benjamini</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hochberg</surname>
                            <given-names>Y</given-names>
                        </name>
</person-group>:
                    <article-title>Controlling the false discovery rate: a practical and powerful approach to multiple testing.</article-title>
                    <source>

                        <italic toggle="yes">J. R. Stat. Soc .Series B.</italic>
</source>
                    <year>1995</year>;<volume>57</volume>(<issue>1</issue>):<fpage>289</fpage>&#x2013;<lpage>300</lpage>.
                    <pub-id pub-id-type="doi">10.1111/j.2517-6161.1995.tb02031.x</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref8">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Bl&#x00fc;thgen</surname>
                            <given-names>N</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Simons</surname>
                            <given-names>NK</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Jung</surname>
                            <given-names>K</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Land use imperils plant and animal community stability through changes in asynchrony rather than diversity.</article-title>
                    <source>

                        <italic toggle="yes">Nat. Commun.</italic>
</source>
                    <year>2016</year>;<volume>7</volume>(<issue>1</issue>):<fpage>10697</fpage>.
                    <pub-id pub-id-type="pmid">26869180</pub-id>
                    <pub-id pub-id-type="doi">10.1038/ncomms10697</pub-id>
                    <pub-id pub-id-type="pmcid">PMC4754335</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref9">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Boshuizen</surname>
                            <given-names>HC</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Te Beest</surname>
                            <given-names>DE</given-names>
                        </name>
</person-group>:
                    <article-title>Pitfalls in the statistical analysis of microbiome amplicon sequencing data.</article-title>
                    <source>

                        <italic toggle="yes">Mol. Ecol. Resour.</italic>
</source>
                    <year>2023</year>;<volume>23</volume>(<issue>3</issue>):<fpage>539</fpage>&#x2013;<lpage>548</lpage>.
                    <pub-id pub-id-type="pmid">36330663</pub-id>
                    <pub-id pub-id-type="doi">10.1111/1755-0998.13730</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref10">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Cameron</surname>
                            <given-names>ES</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Schmidt</surname>
                            <given-names>PJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Tremblay</surname>
                            <given-names>BJ-M</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Enhancing diversity analysis by repeatedly rarefying next generation sequencing data describing microbial communities.</article-title>
                    <source>

                        <italic toggle="yes">Sci. Rep.</italic>
</source>
                    <year>2021</year>;<volume>11</volume>(<issue>1</issue>):<fpage>22302</fpage>.
                    <pub-id pub-id-type="pmid">34785722</pub-id>
                    <pub-id pub-id-type="doi">10.1038/s41598-021-01636-1</pub-id>
                    <pub-id pub-id-type="pmcid">PMC8595385</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref11">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Caporaso</surname>
                            <given-names>JG</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kuczynski</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Stombaugh</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>QIIME allows analysis of high-throughput community sequencing data.</article-title>
                    <source>

                        <italic toggle="yes">Nat. Methods.</italic>
</source>
                    <year>2010</year>;<volume>7</volume>(<issue>5</issue>):<fpage>335</fpage>&#x2013;<lpage>336</lpage>.
                    <pub-id pub-id-type="pmid">20383131</pub-id>
                    <pub-id pub-id-type="doi">10.1038/nmeth.f.303</pub-id>
                    <pub-id pub-id-type="pmcid">PMC3156573</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref12">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Chamanara</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gaikwad</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gerlach</surname>
                            <given-names>R</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>BEXIS2: A FAIR-aligned data management system for biodiversity, ecology and environmental data.</article-title>
                    <source>

                        <italic toggle="yes">Biodivers. Data J.</italic>
</source>
                    <year>2021</year>;<volume>9</volume>.
                    <pub-id pub-id-type="pmid">34785977</pub-id>
                    <pub-id pub-id-type="doi">10.3897/BDJ.9.e72901</pub-id>
                    <pub-id pub-id-type="pmcid">PMC8589773</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref13">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Chavarria</surname>
                            <given-names>KA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Saltonstall</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Vinda</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Land use influences stream bacterial communities in lowland tropical watersheds.</article-title>
                    <source>

                        <italic toggle="yes">Sci. Rep.</italic>
</source>
                    <year>2021</year>;<volume>11</volume>(<issue>1</issue>):<fpage>21752</fpage>.
                    <pub-id pub-id-type="pmid">34741067</pub-id>
                    <pub-id pub-id-type="doi">10.1038/s41598-021-01193-7</pub-id>
                    <pub-id pub-id-type="pmcid">PMC8571290</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref14">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Chiarello</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>McCauley</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Vill&#x00e9;ger</surname>
                            <given-names>S</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Ranking the biases: The choice of OTUs vs. ASVs in 16S rRNA amplicon data analysis has stronger effects on diversity measures than rarefaction and OTU identity threshold.</article-title>
                    <source>

                        <italic toggle="yes">PLoS One.</italic>
</source>
                    <year>2022</year>;<volume>17</volume>(<issue>2</issue>):<fpage>e0264443</fpage>.
                    <pub-id pub-id-type="pmid">35202411</pub-id>
                    <pub-id pub-id-type="doi">10.1371/journal.pone.0264443</pub-id>
                    <pub-id pub-id-type="pmcid">PMC8870492</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref15">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Custer</surname>
                            <given-names>GF</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Diepen</surname>
                            <given-names>LT</given-names>
                            <prefix>van</prefix>
                        </name>

                        <name name-style="western">
                            <surname>Seeley</surname>
                            <given-names>J</given-names>
                        </name>
</person-group>:
                    <article-title>Student perceptions towards introductory lessons in R.</article-title>
                    <source>

                        <italic toggle="yes">Nat. Sci. Educ.</italic>
</source>
                    <year>2021</year>;<volume>50</volume>(<issue>2</issue>):<fpage>e20073</fpage>.
                    <pub-id pub-id-type="doi">10.1002/nse2.20073</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref16">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Davies</surname>
                            <given-names>SJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Abiem</surname>
                            <given-names>I</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Salim</surname>
                            <given-names>KA</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>ForestGEO: Understanding forest diversity and dynamics through a global observatory network.</article-title>
                    <source>

                        <italic toggle="yes">Biol. Conserv.</italic>
</source>
                    <year>2021</year>;<volume>253</volume>:<fpage>108907</fpage>.
                    <pub-id pub-id-type="doi">10.1016/j.biocon.2020.108907</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref17">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Dixon</surname>
                            <given-names>P</given-names>
                        </name>
</person-group>:
                    <article-title>VEGAN, a package of R functions for community ecology.</article-title>
                    <source>

                        <italic toggle="yes">J. Veg. Sci.</italic>
</source>
                    <year>2003</year>;<volume>14</volume>(<issue>6</issue>):<fpage>927</fpage>&#x2013;<lpage>930</lpage>.
                    <pub-id pub-id-type="doi">10.1111/j.1654-1103.2003.tb02228.x</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref18">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Felipe-Lucia</surname>
                            <given-names>MR</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Soliveres</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Penone</surname>
                            <given-names>C</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Land-use intensity alters networks between biodiversity, ecosystem functions, and services.</article-title>
                    <source>

                        <italic toggle="yes">Proc. Natl. Acad. Sci.</italic>
</source>
                    <year>2020</year>;<volume>117</volume>(<issue>45</issue>):<fpage>28140</fpage>&#x2013;<lpage>28149</lpage>.
                    <pub-id pub-id-type="pmid">33093203</pub-id>
                    <pub-id pub-id-type="doi">10.1073/pnas.2016210117</pub-id>
                    <pub-id pub-id-type="pmcid">PMC7668166</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref19">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Finkel</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Baur</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Weber</surname>
                            <given-names>TK</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Managing collaborative research data for integrated, interdisciplinary environmental research.</article-title>
                    <source>

                        <italic toggle="yes">Earth Sci. Inf.</italic>
</source>
                    <year>2020</year>;<volume>13</volume>:<fpage>641</fpage>&#x2013;<lpage>654</lpage>.
                    <pub-id pub-id-type="doi">10.1007/s12145-020-00441-0</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref20">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Fischer</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bossdorf</surname>
                            <given-names>O</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gockel</surname>
                            <given-names>S</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Implementing large-scale and long-term functional biodiversity research: The Biodiversity Exploratories.</article-title>
                    <source>

                        <italic toggle="yes"> Basic Appl. Ecol.</italic>
</source>
                    <year>2010a</year>;<volume>11</volume>(<issue>6</issue>):<fpage>473</fpage>&#x2013;<lpage>485</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.baae.2010.07.009</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref21">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Fischer</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kalko</surname>
                            <given-names>EK</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Linsenmair</surname>
                            <given-names>KE</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Exploratories for large-scale and long-term functional biodiversity research.</article-title>
                    <source>

                        <italic toggle="yes">Long-Term Ecological Research: Between Theory and Application.</italic>
</source>
                    <year>2010b</year>;<fpage>429</fpage>&#x2013;<lpage>443</lpage>.
                    <pub-id pub-id-type="doi">10.1007/978-90-481-8782-9_29</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref22">
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Gl&#x00fc;ck</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bossdorf</surname>
                            <given-names>O</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Thomassen</surname>
                            <given-names>HA</given-names>
                        </name>
</person-group>:
                    <article-title>BEpipeR: a user-friendly, flexible, and scalable data synthesis pipeline for the Biodiversity Exploratories and other research consortia.</article-title>
                    <source>

                        <italic toggle="yes">Zenodo.</italic>
</source>
                    <year>2024</year>.
                    <pub-id pub-id-type="doi">10.5281/zenodo.13838117</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref23">
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Harrell</surname>
                            <given-names>F</given-names>
                        </name>
</person-group>:
                    <article-title>Hmisc: Harrell Miscellaneous.</article-title>
                    <year>2023</year>.
                    <ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=Hmisc">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref24">
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Hijmans</surname>
                            <given-names>RJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bivand</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Forner</surname>
                            <given-names>K</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Package &#x2018;terra&#x2019;.</article-title>
                    <year>2022</year>.
                    <ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=terra">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref25">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Hobbie</surname>
                            <given-names>JE</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Carpenter</surname>
                            <given-names>SR</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Grimm</surname>
                            <given-names>NB</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>The US long term ecological research program.</article-title>
                    <source>

                        <italic toggle="yes">Bioscience.</italic>
</source>
                    <year>2003</year>;<volume>53</volume>(<issue>1</issue>):<fpage>21</fpage>&#x2013;<lpage>32</lpage>.
                    <pub-id pub-id-type="doi">10.1641/0006-3568(2003)053[0021:TULTER]2.0.CO;2</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref26">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kloss</surname>
                            <given-names>L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Fischer</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Durka</surname>
                            <given-names>W</given-names>
                        </name>
</person-group>:
                    <article-title>Land-use effects on genetic structure of a common grassland herb: a matter of scale.</article-title>
                    <source>

                        <italic toggle="yes"> Basic Appl. Ecol.</italic>
</source>
                    <year>2011</year>;<volume>12</volume>(<issue>5</issue>):<fpage>440</fpage>&#x2013;<lpage>448</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.baae.2011.06.001</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref27">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Le Provost</surname>
                            <given-names>G</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Schenk</surname>
                            <given-names>NV</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Penone</surname>
                            <given-names>C</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>The supply of multiple ecosystem services requires biodiversity across spatial scales.</article-title>
                    <source>

                        <italic toggle="yes">Nat. Ecol. Evol.</italic>
</source>
                    <year>2023</year>;<volume>7</volume>(<issue>2</issue>):<fpage>236</fpage>&#x2013;<lpage>249</lpage>.
                    <pub-id pub-id-type="pmid">36376602</pub-id>
                    <pub-id pub-id-type="doi">10.1038/s41559-022-01918-5</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref28">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Lin</surname>
                            <given-names>H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Peddada</surname>
                            <given-names>SD</given-names>
                        </name>
</person-group>:
                    <article-title>Analysis of microbial compositions: a review of normalization and differential abundance analysis.</article-title>
                    <source>

                        <italic toggle="yes">NPJ Biofilms Microbiomes.</italic>
</source>
                    <year>2020</year>;<volume>6</volume>(<issue>1</issue>):<fpage>60</fpage>.
                    <pub-id pub-id-type="pmid">33268781</pub-id>
                    <pub-id pub-id-type="doi">10.1038/s41522-020-00160-w</pub-id>
                    <pub-id pub-id-type="pmcid">PMC7710733</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref29">
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Margalef</surname>
                            <given-names>R</given-names>
                        </name>
</person-group>:
                    <article-title>Information theory in ecology.</article-title>
                    <year>1973</year>.</mixed-citation>
            </ref>
            <ref id="ref30">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>McKnight</surname>
                            <given-names>DT</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Huerlimann</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bower</surname>
                            <given-names>DS</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Methods for normalizing microbiome data: an ecological perspective.</article-title>
                    <source>

                        <italic toggle="yes">Methods Ecol. Evol.</italic>
</source>
                    <year>2019</year>;<volume>10</volume>(<issue>3</issue>):<fpage>389</fpage>&#x2013;<lpage>400</lpage>.
                    <pub-id pub-id-type="doi">10.1111/2041-210X.13115</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref31">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>McMurdie</surname>
                            <given-names>PJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Holmes</surname>
                            <given-names>S</given-names>
                        </name>
</person-group>:
                    <article-title>Waste not, want not: why rarefying microbiome data is inadmissible.</article-title>
                    <source>

                        <italic toggle="yes">PLoS Comput. Biol.</italic>
</source>
                    <year>2014</year>;<volume>10</volume>(<issue>4</issue>):<fpage>e1003531</fpage>.
                    <pub-id pub-id-type="pmid">24699258</pub-id>
                    <pub-id pub-id-type="doi">10.1371/journal.pcbi.1003531</pub-id>
                    <pub-id pub-id-type="pmcid">PMC3974642</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref32">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Menhinick</surname>
                            <given-names>EF</given-names>
                        </name>
</person-group>:
                    <article-title>A comparison of some species-individuals diversity indices applied to samples of field insects.</article-title>
                    <source>

                        <italic toggle="yes">Ecology.</italic>
</source>
                    <year>1964</year>;<volume>45</volume>(<issue>4</issue>):<fpage>859</fpage>&#x2013;<lpage>861</lpage>.
                    <pub-id pub-id-type="doi">10.2307/1934933</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref33">
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <collab>Microsoft Corporation</collab>

                        <name name-style="western">
                            <surname>Weston</surname>
                            <given-names>S</given-names>
                        </name>
</person-group>:
                    <article-title>doSNOW: Foreach Parallel Adaptor for the &#x2018;snow&#x2019; Package.</article-title>
                    <year>2022</year>.
                    <ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=doSNOW">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref34">
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>M&#x00fc;ller</surname>
                            <given-names>K</given-names>
                        </name>
</person-group>:
                    <article-title>here: A Simpler Way to Find Your Files.</article-title>
                    <year>2020</year>.
                    <ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=here">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref35">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Naimi</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hamm</surname>
                            <given-names>NA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Groen</surname>
                            <given-names>TA</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Where is positional uncertainty a problem for species distribution modelling?</article-title>
                    <source>

                        <italic toggle="yes">Ecography.</italic>
</source>
                    <year>2014</year>;<volume>37</volume>(<issue>2</issue>):<fpage>191</fpage>&#x2013;<lpage>203</lpage>.</mixed-citation>
            </ref>
            <ref id="ref36">
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ooms</surname>
                            <given-names>J</given-names>
                        </name>
</person-group>:
                    <article-title>The jsonlite package: A practical and consistent mapping between json data and r objects.</article-title>
                    <year>2014</year>. arXiv preprint arXiv:1403.2805.</mixed-citation>
            </ref>
            <ref id="ref37">
                <mixed-citation publication-type="other">
                    <collab>R Core Team</collab>:
                    <article-title>R: A Language and Environment for Statistical Computing.</article-title>
                    <year>2021</year>.
                    <ext-link ext-link-type="uri" xlink:href="https://www.R-project.org/">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref38">
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Racine</surname>
                            <given-names>JS</given-names>
                        </name>
</person-group>:
                    <article-title>RStudio: a platform-independent IDE for R and Sweave, JSTOR.</article-title>
                    <year>2012</year>.</mixed-citation>
            </ref>
            <ref id="ref39">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Rovero</surname>
                            <given-names>F</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ahumada</surname>
                            <given-names>J</given-names>
                        </name>
</person-group>:
                    <article-title>The Tropical Ecology, Assessment and Monitoring (TEAM) Network: An early warning system for tropical rain forests.</article-title>
                    <source>

                        <italic toggle="yes">Sci. Total Environ.</italic>
</source>
                    <year>2017</year>;<volume>574</volume>:<fpage>914</fpage>&#x2013;<lpage>923</lpage>.
                    <pub-id pub-id-type="pmid">27665451</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.scitotenv.2016.09.146</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref40">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Saary</surname>
                            <given-names>P</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Forslund</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bork</surname>
                            <given-names>P</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>RTK: efficient rarefaction analysis of large datasets.</article-title>
                    <source>

                        <italic toggle="yes">Bioinformatics.</italic>
</source>
                    <year>2017</year>;<volume>33</volume>(<issue>16</issue>):<fpage>2594</fpage>&#x2013;<lpage>2595</lpage>.
                    <pub-id pub-id-type="pmid">28398468</pub-id>
                    <pub-id pub-id-type="doi">10.1093/bioinformatics/btx206</pub-id>
                    <pub-id pub-id-type="pmcid">PMC5870771</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref41">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Schloss</surname>
                            <given-names>PD</given-names>
                        </name>
</person-group>:
                    <article-title>Rarefaction is currently the best approach to control for uneven sequencing effort in amplicon sequence analyses.</article-title>
                    <source>

                        <italic toggle="yes">mSphere.</italic>
</source>
                    <year>2024</year>;<fpage>e00354-00323</fpage>.</mixed-citation>
            </ref>
            <ref id="ref42">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Schloss</surname>
                            <given-names>PD</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Westcott</surname>
                            <given-names>SL</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ryabin</surname>
                            <given-names>T</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Introducing mothur: open-source, platform-independent, community-supported software for describing and comparing microbial communities.</article-title>
                    <source>

                        <italic toggle="yes">Appl. Environ. Microbiol.</italic>
</source>
                    <year>2009</year>;<volume>75</volume>(<issue>23</issue>):<fpage>7537</fpage>&#x2013;<lpage>7541</lpage>.
                    <pub-id pub-id-type="pmid">19801464</pub-id>
                    <pub-id pub-id-type="doi">10.1128/AEM.01541-09</pub-id>
                    <pub-id pub-id-type="pmcid">PMC2786419</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref43">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Seibold</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gossner</surname>
                            <given-names>MM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Simons</surname>
                            <given-names>NK</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Arthropod decline in grasslands and forests is associated with landscape-level drivers.</article-title>
                    <source>

                        <italic toggle="yes">Nature.</italic>
</source>
                    <year>2019</year>;<volume>574</volume>(<issue>7780</issue>):<fpage>671</fpage>&#x2013;<lpage>674</lpage>.
                    <pub-id pub-id-type="pmid">31666721</pub-id>
                    <pub-id pub-id-type="doi">10.1038/s41586-019-1684-3</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref44">
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Shannon</surname>
                            <given-names>CE</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Weaver</surname>
                            <given-names>W</given-names>
                        </name>
</person-group>:
                    <source>

                        <italic toggle="yes">The mathematical theory of communication.</italic>
</source>
                    <publisher-name>University of Illinois Press</publisher-name>;<year>1949</year>.</mixed-citation>
            </ref>
            <ref id="ref45">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Simpson</surname>
                            <given-names>EH</given-names>
                        </name>
</person-group>:
                    <article-title>Measurement of diversity.</article-title>
                    <source>

                        <italic toggle="yes">Nature.</italic>
</source>
                    <year>1949</year>;<volume>163</volume>(<issue>4148</issue>):<fpage>688</fpage>&#x2013;<lpage>688</lpage>.
                    <pub-id pub-id-type="doi">10.1038/163688a0</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref46">
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ushey</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wickham</surname>
                            <given-names>H</given-names>
                        </name>
</person-group>:
                    <article-title>renv: Project Environments.</article-title>
                    <year>2023</year>.
                    <ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=renv">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref47">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>V&#x00e1;lyi</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Rillig</surname>
                            <given-names>MC</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hempel</surname>
                            <given-names>S</given-names>
                        </name>
</person-group>:
                    <article-title>Land-use intensity and host plant identity interactively shape communities of arbuscular mycorrhizal fungi in roots of grassland plants.</article-title>
                    <source>

                        <italic toggle="yes">New Phytol.</italic>
</source>
                    <year>2015</year>;<volume>205</volume>(<issue>4</issue>):<fpage>1577</fpage>&#x2013;<lpage>1586</lpage>.
                    <pub-id pub-id-type="pmid">25545193</pub-id>
                    <pub-id pub-id-type="doi">10.1111/nph.13236</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref48">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Breugel</surname>
                            <given-names>M</given-names>
                            <prefix>van</prefix>
                        </name>

                        <name name-style="western">
                            <surname>Craven</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lai</surname>
                            <given-names>HR</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Soil nutrients and dispersal limitation shape compositional variation in secondary tropical forests across multiple scales.</article-title>
                    <source>

                        <italic toggle="yes">J. Ecol.</italic>
</source>
                    <year>2019</year>;<volume>107</volume>(<issue>2</issue>):<fpage>566</fpage>&#x2013;<lpage>581</lpage>.
                    <pub-id pub-id-type="doi">10.1111/1365-2745.13126</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref49">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Walters</surname>
                            <given-names>KE</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Martiny</surname>
                            <given-names>JB</given-names>
                        </name>
</person-group>:
                    <article-title>Alpha-, beta-, and gamma-diversity of bacteria varies across habitats.</article-title>
                    <source>

                        <italic toggle="yes">PLoS One.</italic>
</source>
                    <year>2020</year>;<volume>15</volume>(<issue>9</issue>):<fpage>e0233872</fpage>.
                    <pub-id pub-id-type="pmid">32966309</pub-id>
                    <pub-id pub-id-type="doi">10.1371/journal.pone.0233872</pub-id>
                    <pub-id pub-id-type="pmcid">PMC7510982</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref50">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Weiner</surname>
                            <given-names>CN</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Werner</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Linsenmair</surname>
                            <given-names>KE</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Land-use impacts on plant&#x2013;pollinator networks: interaction strength and specialization predict pollinator declines.</article-title>
                    <source>

                        <italic toggle="yes">Ecology.</italic>
</source>
                    <year>2014</year>;<volume>95</volume>(<issue>2</issue>):<fpage>466</fpage>&#x2013;<lpage>474</lpage>.
                    <pub-id pub-id-type="pmid">24669739</pub-id>
                    <pub-id pub-id-type="doi">10.1890/13-0436.1</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref51">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Weiss</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Xu</surname>
                            <given-names>ZZ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Peddada</surname>
                            <given-names>S</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Normalization and microbial differential abundance strategies depend upon data characteristics.</article-title>
                    <source>

                        <italic toggle="yes">Microbiome.</italic>
</source>
                    <year>2017</year>;<volume>5</volume>:<fpage>1</fpage>&#x2013;<lpage>18</lpage>.</mixed-citation>
            </ref>
            <ref id="ref52">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Wickham</surname>
                            <given-names>H</given-names>
                        </name>
</person-group>:
                    <article-title>The split-apply-combine strategy for data analysis.</article-title>
                    <source>

                        <italic toggle="yes">J. Stat. Softw.</italic>
</source>
                    <year>2011</year>;<volume>40</volume>:<fpage>1</fpage>&#x2013;<lpage>29</lpage>.</mixed-citation>
            </ref>
            <ref id="ref53">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Wickham</surname>
                            <given-names>H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Averick</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bryan</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Welcome to the Tidyverse.</article-title>
                    <source>

                        <italic toggle="yes">J. Open Source Softw.</italic>
</source>
                    <year>2019</year>;<volume>4</volume>(<issue>43</issue>):<fpage>1686</fpage>.
                    <pub-id pub-id-type="doi">10.21105/joss.01686</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref54">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>W&#x00f6;llauer</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Zeuss</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>H&#x00e4;nsel</surname>
                            <given-names>F</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>TubeDB: An on-demand processing database system for climate station data.</article-title>
                    <source>

                        <italic toggle="yes">Comput. Geosci.</italic>
</source>
                    <year>2021</year>;<volume>146</volume>:<fpage>104641</fpage>.
                    <pub-id pub-id-type="doi">10.1016/j.cageo.2020.104641</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref55">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Xia</surname>
                            <given-names>Y</given-names>
                        </name>
</person-group>:
                    <article-title>Statistical normalization methods in microbiome data with application to microbiome cancer research.</article-title>
                    <source>

                        <italic toggle="yes">Gut Microbes.</italic>
</source>
                    <year>2023</year>;<volume>15</volume>(<issue>2</issue>):<fpage>2244139</fpage>.
                    <pub-id pub-id-type="pmid">37622724</pub-id>
                    <pub-id pub-id-type="doi">10.1080/19490976.2023.2244139</pub-id>
                    <pub-id pub-id-type="pmcid">PMC10461514</pub-id>
                </mixed-citation>
            </ref>
        </ref-list>
    </back>
    <sub-article article-type="reviewer-report" id="report351632">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.172574.r351632</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Greni&#x00e9;</surname>
                        <given-names>Matthias</given-names>
                    </name>
                    <xref ref-type="aff" rid="r351632a1">1</xref>
                    <xref ref-type="aff" rid="r351632a2">2</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-4659-7522</uri>
                </contrib>
                <aff id="r351632a1">
                    <label>1</label>Laboratoire d'Ecologie Alpine (Ringgold ID: 56837), Grenoble, Auvergne-Rh&#x00f4;ne-Alpes, France</aff>
                <aff id="r351632a2">
                    <label>2</label>Universite Grenoble Alpes, Saint-Martin-d'H&#x00e8;res, Auvergne-Rh&#x00f4;ne-Alpes, France</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>23</day>
                <month>1</month>
                <year>2025</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2025 Greni&#x00e9; M</copyright-statement>
                <copyright-year>2025</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport351632" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.157160.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>reject</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>I was asked to review the BEPipeR manuscript, as a great tool for the Biodiversity Exploratories. While clearly a huge amount of work was put into designing and creating the pipeline, I do have strong reserves regarding its ease of use and clarity as a tool.</p>
            <p> </p>
            <p> One first question I had while reading the manuscript was to know what BEPipeR were. From reading the manuscript, this wasn't clear from the beginning. I would have expected BEPipeR to be an R package or a shiny application, as it aimed to simplify data access and analysis for the user. I understood, after finishing the introduction that it was indeed an R pipeline, but I was wondering what the added benefit was compared to an R package or a shiny app? It seems to me that a pipeline is more brittle to change than a well defined R package. It's also harder to distribute as it doesn't uses the main mechanism to spread analyses through R which are R packages. It also doesn't leverage the "Research Compendium" suggested by Marwick et al. (2018) and others at the same time (see https://github.com/benmarwick/rrtools). That infrastructure tries to provide the best of both world between R packages and pipelines. Research compendia can be spread through https://docs.r-universe.dev/ for easy install and leveraging the default installation mechanism of R. I think the author should also be clear why choosing not to create an R package was better to help user and instead creating a full pipeline with many specificities.</p>
            <p> </p>
            <p> One main question I think the authors should take some time to answer is to identify their target audience. Who are they addressing to? Are the users going to be people comfortable editing an R script? Would they rather edit a clear configuration file? How about people who want to tweak their own pipeline? I think this work should be preliminary to building such a complex tool, as BEPipeR is targeting several user types without clearly facilitating their work: you both have to edit the R script for specific values and get your own configuration files. One good way of identifying the target audience, is to run tests runs of the software with potential users to gather feedback about what was clear and what wasn't. This is invaluable when building a tool, to make sure what you're creating is (1) going to serve the users' needs; (2) be used; (3) easy to use.</p>
            <p> </p>
            <p> Whether it was within the manuscript or the GitHub repository, I found difficult to get a quick overview of what was achievable with BEPipeR. Before following the entire script it isn't clear what the main functions of BEPipeR are. What are the core functions of BEPipeR? You could show quick snippets of plots generated through the BEPipeR pipeline or data analyzed with it on the GitHub page or within the manuscript. The fact that I had to manually go through the script to understand what was achieved concretly by BEPipeR prevents me from using it as I didn't know how it could help before hand. In addition to&#x00a0; "Setting up" instructions, you could provide a "Where to start" section to clear define the goals and possibilities offered by BEPipeR. Also, these instructions should be available within the GitHub repository, as not all users are going to refer to the manuscript to understand the basic features of BEPipeR. One thing that would be nice would be to see where BEPipeR stands in terms of the analysis pipeline of the Biodiversity Exploratories datasets.</p>
            <p> </p>
            <p> Regarding installation of BEPipeR, I had a hard time setting up with the exact version of R and thus installing the Pipeline, I had to install [rig]() to be able to switch R versions as R 4.1.1 is already considered quite old. I had to use admin privileges to install `rig`, then use a command line instruction to install the specific R version, than load RStudio with the good version of R, then restore the `renv` file. I'm unsure if the average user of the pipeline would be confident to perform such a long and complicated process. Also, it doesn't seem to be a good idea to have several R versions laying around your computer. This can definitely confuse your users. While I understand the need to fix package version to ensure maximum reproducibility, I urge the authors to point the users to a "simple" way of doing so. After about an hour of struggle, I managed to install all versions of needed software (previous R version, previous version of RTools, and the `renv` environment). I really wonder to what extent this process is easily extendable to other users, and this, to me, emphasizes the fact that building an R package with clearly defined package versions would make the full process easier. Also, if BEPipeR requires all that, while needing an R connection, I don't understand why it requires to manually download the GADM German gpkg file instead of providing a function to download it at the good place. Also, it puts the burden on the users' shoulders where they have not only to download but also rename it properly and move it in the appropriate folder. A user can stumble over any of each of these steps.</p>
            <p> </p>
            <p> Regarding the pipeline, as advertised in the manuscript as "user-friendly processing of data sets", I was expecting a clear separation between the features provided by BEPipeR and the script that needs to be tweaked by the users. What I found was quite hard to understand: a 3000-lines R script, with both user-input values mixed up with actual functional code to process the dataset. I found this structure impossibly complex for the feature. I would have expected some functionality to be "packaged" somehow, if not in an R package, at least in some R/ folders with local functions. Understanding what parts of this script were to be modified and which ones shouldn't was quite difficult. Even though everything was explained in much details in the manuscript, I had trouble connecting the manuscript to the actual script. I would have expected the manuscript to more explicitly show parts of the script to explain how it works. Instead the manuscript goes in great length explaining what the script does, without explaining how to use it! For example, the param*.csv files are loaded in the script at three different locations, why is that? Do you expect the user to go through these three locations to understand what's happening? The three files could be loaded at the top of the script to easily separate between user input and data loaded automatically. Also, even though there are dictionaries provided for the design of each of these files, the structure for variable selection is quite hard to understand what these files are and how to create them. I think extensive documentation of these files, with verbal description and schemes showing how they connect to the pipeline would help. The three param files are mentioned in Figure 1, but it's really unclear how they connect to the pipeline exactly. What do they provide and how are they reused throughout the analysis?</p>
            <p> </p>
            <p> Regarding script structure, one thing possible would be to divide the script into smaller scripts to abstract away some details and better structure the code. With each script calling common functions, or at least calling one another. The main script doesn't follow RStudio's syntax to create clear parts that the user can jump through using the "Show Document Outline" feature, the convention is that any comment in R followed by text and four times the same character is considered "a part" of the script. This makes navigating the script harder, especially if you're expecting the user to use RStudio.</p>
            <p> </p>
            <p> Another thing that puzzled me, whether in the manuscript or the software documentation is that nowhere was explained how I should run the script. Should I run `source()` directly on the main script? With which working directory? Which parameters should I make sure to have changed? Should I run the script line-by-line or section by section? This should be clearly stated as a naive user would want to know this information rapidly when downloading the pipeline.</p>
            <p> </p>
            <p> While the paper is a great companion to explain how to use BEPipeR, I think the pipeline should be self-sufficient and contain enough documentation to be able to operate the pipeline without referring to external documentation. The documentation should be extensive about the steps, both about what analyses are performed but also, and it's quite important for a pipeline, how to perform them, with which possible options.</p>
            <p> </p>
            <p> Have you tested it against users? What was their feedback? I would be curious to know as I expect the use of such a script to be quite complex for naive users. I would definitely recommend the authors to simplify the features they want to work on, to compartimentalize their pipeline through well-defined functions made available to the users, and simplify the final script exposed to the user, with easy to run routine. Like "run this main script that is going to call internal functions". The authors could also look into building a [targets](https://docs.ropensci.org/targets/) pipeline for ease of reproducibility.</p>
            <p> </p>
            <p> I had two additional small remarks: the first one is I congratulate the authors for having thought about archiving their software on Zenodo in addition to GitHub as it allows near-permanent archival. The second is that I enjoyed seeing the hexagonal logo of BEPipeR but it doesn't seem specific enough to be related to Biodiversity Exploratories. Maybe you could think about a logo that reuses parts of the graphical identity of Biodiversity Explaratories.</p>
            <p> </p>
            <p> # References</p>
            <p> 1.)&#x00a0;Marwick et.al., 2018 (Ref 1)</p>
            <p>Are the conclusions about the tool and its performance adequately supported by the findings presented in the article?</p>
            <p>Partly</p>
            <p>Is the rationale for developing the new software tool clearly explained?</p>
            <p>Partly</p>
            <p>Is the description of the software tool technically sound?</p>
            <p>Partly</p>
            <p>Are sufficient details of the code, methods and analysis (if applicable) provided to allow replication of the software development and its use by others?</p>
            <p>Partly</p>
            <p>Is sufficient information provided to allow interpretation of the expected output datasets and any results generated using the tool?</p>
            <p>Partly</p>
            <p>Reviewer Expertise:</p>
            <p>biodiversity metrics, biodiversity tools</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to state that I do not consider it to be of an acceptable scientific standard, for reasons outlined above.</p>
        </body>
        <back>
            <ref-list>
                <title>References</title>
                <ref id="rep-ref-351632-1">
                    <label>1</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Packaging data analytical work reproducibly using R (and friends)</article-title>.<year>2018</year>;
                        <elocation-id>10.7287/peerj.preprints.3192v2</elocation-id>
                        <pub-id pub-id-type="doi">10.7287/peerj.preprints.3192v2</pub-id>
                    </mixed-citation>
                </ref>
            </ref-list>
        </back>
    </sub-article>
    <sub-article article-type="reviewer-report" id="report341491">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.172574.r341491</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Gould</surname>
                        <given-names>Elliot</given-names>
                    </name>
                    <xref ref-type="aff" rid="r341491a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-6585-538X</uri>
                </contrib>
                <aff id="r341491a1">
                    <label>1</label>The University of Melbourne, Melbourne, Victoria, Australia</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>10</day>
                <month>12</month>
                <year>2024</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2024 Gould E</copyright-statement>
                <copyright-year>2024</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport341491" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.157160.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve-with-reservations</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>
                <bold>Summary:</bold>
            </p>
            <p> </p>
            <p> Research Consortia involved in large-scale long-term environmental research frameworks, such as Biodiversity Exploratories, continuously accrue vast amounts of data. However, a great deal of effort, technical expertise, and data processing infrastructure is required to leverage these data to answer even trivial research questions, let alone more intricate questions.&#x00a0;</p>
            <p> </p>
            <p> Gl&#x00fc;ck et al. have developed the BEpipeR tool to facilitate the streamlined synthesis of plot-based Biodiversity Exploratories data. BEpipeR is written in R, one of the most (if not *the most*) popular programming languages in ecology. The approach taken by BEpipeR is a template-based approach whereby the specifications for the data-processing and synthesis pipeline are provided as parameters within .csv files, which are then read in as R objects provided to the main pipeline script.&#x00a0;</p>
            <p> </p>
            <p> While there might be more sophisticated ways of approaching the task (e.g. the Portal Project) than using parameter inputs or dealing with dynamic analysis contexts via static templates (i.e. data could be in long or wide formats and the user has to specify whether the data is long or wide instead of this being handled automatically); the strength of a simpler approach is that potential errors should be more easily detectible and resolvable by more novice R users, the pipeline can be easily modified, or expanded to other projects.</p>
            <p> </p>
            <p> Although the full computing environment for reproducing and operating the pipeline is not provided, the software environment is provided utilising the 
                <italic>renv</italic>&#x00a0;R package, which is supported by RStudio / Posit, and is a well-known, documented and supported tool. This is a good choice given the target user base of the software are mostly undertaking data synthesis manually with spreadsheets. Although providing the full computing environment through a tool like docker/rocker might enable greater reproducibility and longevity / stability, it would not be approachable to the target user.</p>
            <p> </p>
            <p> The software can be downloaded from GitHub, and the user is expected to execute the software using the RStudio IDE, which is also an excellent choice given its popularity, and approachability for new and experienced R users alike.</p>
            <p> </p>
            <p> Software documentation mainly comprises the manuscript under review, as well as the associated README and some markdown files embedded in subdirectories, which gives a neat overview of the purpose and contents of the repository. It's always nice to see a pretty HEX sticker too.</p>
            <p> </p>
            <p> I have not had sufficient time to properly play with and test the software / code in detail as I would prefer (
                <bold>Ivimey-Cook et al. 2023)</bold>. But I have briefly explored the main code script and associated files, such as the parameter files, so the majority of my review comprises critiques about the manuscript, and some minimal aspects of implementation.</p>
            <p> </p>
            <p> The manuscript was well-calibrated in the level of detail and explanation for the software's target user, and included a good level of detail, in terms of set up / implementation, as well as some nice design choices that facilitate use by less familiar R users. e.g. the choice to include an excel spreadsheet version of the parameter specification files and associated data dictionary sheet, and the distribution of the software in a 'just-ran' state, so that users can get a handle on how the outputs relate to inputs and specification settings. I also particularly liked Figure 1, which gives a great visual overview of the data pipeline.</p>
            <p> </p>
            <p> 
                <bold>Major Issues &amp; Improvements</bold>
            </p>
            <p> </p>
            <p> A major issue with the manuscript were the small but frequent grammatical errors and stylistic expression that hampered clarity of meaning. Secondly, the rationale for the software could be more tightly synthesized with relevant literature, which, if addressed, would help in highlighting the utility and merits of the software. Similarly, the discussion could reference related approaches / literature in to contextualise the intended application and contrast alternative approaches to this problem. Minor issues include the need for further clarifying some aspects of the pipeine in appropriate logical sequence, and some further explanation or provision of further resources / user guides for the more complex elements of the software. These are addressed in detail below.</p>
            <p> </p>
            <p> 
                <italic>Rationale</italic>
            </p>
            <p> </p>
            <p> - In reference to your sentence "By doing so, they might leave out potential data that would have been instrumental in answering their complex scientific questions, ultimately causing a loss in statistical power," a&#x00a0;loss of statistical power is certainly a big problem caused by not utilising all available data. I think this problem is actually more serious/nuanced than reduced statistical power. For example, another problem is that including or excluding particular subsets of the data may have a large influence on the results and therefore findings (check out some many-analyst style studies for examples of how analytic decisions may have large bearings on the research findings, e.g. Gould et al. in press -- disclaimer, I am first author on this paper, there is no pressure to cite, but we did find that data subsetting could swing numerical results wildly at the boundaries of expected values).&#x00a0;</p>
            <p> - Secondly, this ultimately is also a form of 'research waste.' You could also consider citing Buxton et al. (2021) Ref 2, or Purgar et al. (2022) Ref 3 here.</p>
            <p> - Considering these additional problems related to a lack of appropriate infrastructure for leveraging available data would further underscore the need for the *BEpipeR* package.</p>
            <p> - Taking a positive perspective, improving data usage within- and among- consortiums, through utilising tools similar to *BEpipeR* would contribute to the task of data / evidence synthesis, and potentially tackling broader empirical research questions, around generality, for example.</p>
            <p> - Tools, such as *BEpipeR*, support good data stewardship and data management with a long-term vision which ultimately support scientific discovery and progress (Wilkinson et al. 2016) Ref 4</p>
            <p> </p>
            <p> 
                <italic>Other Related Literature / Applications</italic>
            </p>
            <p> </p>
            <p> - Portal Project and associated package (addresses the same problem as BEpipeR, but for the Portal Project): 
                <ext-link ext-link-type="uri" xlink:href="https://portal.weecology.org/">https://portal.weecology.org</ext-link> 
                <ext-link ext-link-type="uri" xlink:href="https://github.com/weecology/portalr">https://github.com/weecology/portalr</ext-link> (Christensen et al. 2019) Ref 1</p>
            <p> - "Regularly updated data" (Yenni et al. 2019) Ref 5-- a potential use-case for BEpipeR as new monitoring data is collected over time.</p>
            <p> - "Near Term forecasting" (White et al. 2019) Ref 6-- another potential use-case for BEpipeR that could be incorporated into automated pipelines when the software matures into the future.</p>
            <p> </p>
            <p> 
                <italic>Use-case demonstration</italic>
            </p>
            <p> </p>
            <p> - I found the textual summary of the use-case was rather onerous to read. This could be better presented as a diagram / schematic, perhaps with snapshots of the data, and a summary of the user-settings at various phases of the pipeline.</p>
            <p> </p>
            <p> 
                <bold>Minor Issues</bold>
            </p>
            <p> </p>
            <p> - 
                <italic>abstract</italic>:</p>
            <p> &#x00a0;&#x00a0; &#x00a0;- 
                <italic>Background</italic>: I think a clause qualifying what Biodiversity Exploratories is ("Large-scale long-term environmental research frameworks") in the abstract would be handy, perhaps at the end of the final sentence in the 'Background' component of the abstract.</p>
            <p> - 
                <italic>Implementation</italic>:</p>
            <p> &#x00a0;&#x00a0; &#x00a0;- It was nice to see an explanation of the setup that includes an explanation of `renv`, I suggest providing a link to the user-guide (
                <ext-link ext-link-type="uri" xlink:href="https://docs.posit.co/ide/user/ide/guide/environments/r/renv.html">https://docs.posit.co/ide/user/ide/guide/environments/r/renv.html</ext-link>), given the target audience are not experienced R users. I've personally experienced a bit of difficulty with `renv` at times, even thought I consider myself to be at the pointy end of things.</p>
            <p> &#x00a0;&#x00a0; &#x00a0;- "For up-to-date set-up instructions, users are referred to the pipeline&#x2019;s GitHub presence" - I suggest qualifying the exact location. When I went to find where this information might be, I could only find the same content stored as 
                <ext-link ext-link-type="uri" xlink:href="https://github.com/marcelglueck/BEpipeR/blob/main/setup_guide.md">a markdown file</ext-link>.</p>
            <p> &#x00a0;&#x00a0; &#x00a0;- "As the Excel versions of these files support users in providing processing information [...]" - the fact that there are .xlsx versions of the same .csv files hasn't yet been introduced in this paragraph. I suggest stating so clearly and rewording. I had to go to the repository to be sure of the intended meaning here.</p>
            <p> - 
                <italic>Table 2:&#x00a0;</italic>
            </p>
            <p> &#x00a0;&#x00a0; &#x00a0;- Move "Source" row above "Processing" since the content of "Processing" is the product of "Source": "Files copied here from &#x2018;Source&#x2019; for processing through BEpipeR. This directory is expunged at the start of each pipeline run."</p>
            <p> &#x00a0;&#x00a0; &#x00a0;- "The R programming language script of the BEpipeR pipeline": replace "of the" with, "that executes", or similar.&#x00a0;</p>
            <p> - 
                <italic>Data preprocessing:</italic>
            </p>
            <p> &#x00a0;&#x00a0; &#x00a0;- "R session as &#x2018;datasets_table&#x2019;" should be "global environment for the active R session as the object &#x2018;datasets_table&#x2019;".</p>
            <p> &#x00a0;&#x00a0; &#x00a0;- In my experience reshaping data between long and wide formats can sometimes be non-trivial. I would suggest giving users a warning here to check the outputs are as expected after reshaping. An example of the data reshaping inputs and outputs might also help exemplify this process a little further. Perhaps as a vignette in the repository or in a text-box. I found the reshaping a little hard to mentally visualize from the text description.&#x00a0;</p>
            <p> &#x00a0;&#x00a0; &#x00a0;- The 'standardization by variable' paragraph was also a little tricky to follow. It's stated that "BEpipeR allows all numeric variables of a data set to be normalized by information provided in a user-specified column of that data set", what is the specific normalization procedure used? This should be mentioned so that the user can understand the calculation fully. I assume from the previous sentence in the paragraph mentioning sampling effort, and from reading below that the standardization approach is rarefaction?</p>
            <p> - 
                <italic>Quality Checks</italic>
            </p>
            <p> &#x00a0;&#x00a0; &#x00a0;- It seems that outlier checks might be performed once (maybe twice?) by the user, and that after running the pipeline once to flag potential outliers, the user can then remove outliers, and then re-run the pipeline to completion. After doing so, should the user then turn off the quality checking? Figure 1 gives the impression of a somewhat linear pipeline, but perhaps the process is more iterative depending on actions taken after QC. A little more explanation of the overall workflow here would be handy.</p>
            <p> - 
                <italic>Diversity calculations</italic>
            </p>
            <p> &#x00a0;&#x00a0; &#x00a0;- Which exact &#x00a0;procedure is used to "calculate alpha diversity indices"? This isn't mentioned until the very end of the paragraph, which follows very detailed treatment of the contentious nature of rarefaction.&#x00a0;</p>
            <p> - 
                <italic>Post-processing</italic>
            </p>
            <p> &#x00a0;&#x00a0; &#x00a0;- "BEpipeR left-joins all available data to the plot IDs template constructed upstream." The target user is not going to know what a left-join is, most likely. Perhaps you could provide a link to R4ds, which explains this procedure further: https://r4ds.hadley.nz/joins . https://r4ds.had.co.nz/relational-data.html#outer-join has a really nice diagram depicting and explaining this.</p>
            <p> - 
                <italic>Conclusion</italic>
            </p>
            <p> &#x00a0;&#x00a0; &#x00a0;- "People interested can contribute both conceptually, by providing suggestions for future implementations, and preferably, by coding." This needs a little further explanation, i.e. people can contribute by making pull-requests to the package's code-base on GitHub.</p>
            <p> </p>
            <p> 
                <italic>Referencing</italic>
            </p>
            <p> </p>
            <p> I don't think Baker (2016) is the right reference for the statement "Arguably, BEpipeR has the potential to generate large composite data sets in a highly reproducible fashion." Either you meant to cite Baker (2017), which is also in your reference list, or you should cite something more focussed on the intersection of data pipelines / infrastructure and reproducibility.</p>
            <p> </p>
            <p> 
                <italic>Software Citation Style</italic>
            </p>
            <p> </p>
            <p> Instead of having a very long sentence chewing up a whole paragraph containing all of the software used in your package, consider using a table of package citations, e.g. following the citation report approach taken by the *
                <italic>grateful</italic>* package, 
                <ext-link ext-link-type="uri" xlink:href="https://pakillo.github.io/grateful/index.html#using-grateful-within-rmarkdown">summarising packages within a table</ext-link>.</p>
            <p> </p>
            <p> 
                <italic>Expression</italic>
            </p>
            <p> </p>
            <p> - At times the writing was hyperbolic and not so succinct, e.g. "fuelled by" (perhaps replace with, "underpinned by") and "unmatched standing expertise", "To conclude, even though this project might be facing substantial challenges, it is the Exploratories&#x2019; large base of researchers and scientific staff that has the potential to render this endeavour a success." I think an additional edit for succinctness and clarity of expression is needed.</p>
            <p> - Overuse of demonstrative particles, such as 'this', 'that', 'these' combined with nominalization at times hampered clarity and slowed my understanding, e.g. of nominalisation: "that allow for a straightforward and less time-consuming incorporation into their workflows". Suggest switching to more active language. This will also help with succinctness.</p>
            <p> - "executing the pipeline productively" it's not clear what 'productively' means here. Do you mean 'in production'?</p>
            <p> - "Additionally, in the following, we provide an", replace with "Below we provide an"</p>
            <p> - Shift "to guide users in familiarizing themselves with the pipeline" after "an in-depth description of the workflow".&#x00a0;</p>
            <p> </p>
            <p> 
                <italic>Grammatical, spelling, typographical errors</italic>
            </p>
            <p> </p>
            <p> - "data subsettings": 'subsetting' is plural.</p>
            <p> - "data substitution through exact and pattern-based approaches", do you mean 'subsetting'?</p>
            <p> - "resolving species aggregates issues", replace "species aggregates" with 'species aggregation' or similar.</p>
            <p> - First sentence of last paragraph in introduction is extremely long, and the clause following the semi-colon seems incomplete grammatically.</p>
            <p>Are the conclusions about the tool and its performance adequately supported by the findings presented in the article?</p>
            <p>Yes</p>
            <p>Is the rationale for developing the new software tool clearly explained?</p>
            <p>Yes</p>
            <p>Is the description of the software tool technically sound?</p>
            <p>Yes</p>
            <p>Are sufficient details of the code, methods and analysis (if applicable) provided to allow replication of the software development and its use by others?</p>
            <p>Partly</p>
            <p>Is sufficient information provided to allow interpretation of the expected output datasets and any results generated using the tool?</p>
            <p>Partly</p>
            <p>Reviewer Expertise:</p>
            <p>Applied ecology / ecological modelling, research software development for data analysis pipelines.</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above.</p>
        </body>
        <back>
            <ref-list>
                <title>References</title>
                <ref id="rep-ref-341491-1">
                    <label>1</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>portalr: an R package for summarizing and using the Portal Project Data</article-title>.
                        <source>
                            <italic>Journal of Open Source Software</italic>
                        </source>.<year>2019</year>;<volume>4</volume>(<issue>33</issue>) :
                        <elocation-id>10.21105/joss.01098</elocation-id>
                        <pub-id pub-id-type="doi">10.21105/joss.01098</pub-id>
                    </mixed-citation>
                </ref>
                <ref id="rep-ref-341491-2">
                    <label>2</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Avoiding wasted research resources in conservation science</article-title>.
                        <source>
                            <italic>Conservation Science and Practice</italic>
                        </source>.<year>2021</year>;<volume>3</volume>(<issue>2</issue>) :
                        <elocation-id>10.1111/csp2.329</elocation-id>
                        <pub-id pub-id-type="doi">10.1111/csp2.329</pub-id>
                    </mixed-citation>
                </ref>
                <ref id="rep-ref-341491-3">
                    <label>3</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Quantifying research waste in ecology.</article-title>
                        <source>
                            <italic>Nat Ecol Evol</italic>
                        </source>.<year>2022</year>;<volume>6</volume>(<issue>9</issue>) :
                        <elocation-id>10.1038/s41559-022-01820-0</elocation-id>
                        <fpage>1390</fpage>-<lpage>1397</lpage>
                        <pub-id pub-id-type="pmid">35864230</pub-id>
                        <pub-id pub-id-type="doi">10.1038/s41559-022-01820-0</pub-id>
                    </mixed-citation>
                </ref>
                <ref id="rep-ref-341491-4">
                    <label>4</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>The FAIR Guiding Principles for scientific data management and stewardship.</article-title>
                        <source>
                            <italic>Sci Data</italic>
                        </source>.<year>2016</year>;<volume>3</volume>:
                        <elocation-id>10.1038/sdata.2016.18</elocation-id>
                        <fpage>160018</fpage>
                        <pub-id pub-id-type="pmid">26978244</pub-id>
                        <pub-id pub-id-type="doi">10.1038/sdata.2016.18</pub-id>
                    </mixed-citation>
                </ref>
                <ref id="rep-ref-341491-5">
                    <label>5</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Developing a modern data workflow for regularly updated data.</article-title>
                        <source>
                            <italic>PLoS Biol</italic>
                        </source>.<year>2019</year>;<volume>17</volume>(<issue>1</issue>) :
                        <elocation-id>10.1371/journal.pbio.3000125</elocation-id>
                        <fpage>e3000125</fpage>
                        <pub-id pub-id-type="pmid">30695030</pub-id>
                        <pub-id pub-id-type="doi">10.1371/journal.pbio.3000125</pub-id>
                    </mixed-citation>
                </ref>
                <ref id="rep-ref-341491-6">
                    <label>6</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Developing an automated iterative near&#x2010;term forecasting system for an ecological study</article-title>.
                        <source>
                            <italic>Methods in Ecology and Evolution</italic>
                        </source>.<year>2019</year>;<volume>10</volume>(<issue>3</issue>) :
                        <elocation-id>10.1111/2041-210X.13104</elocation-id>
                        <fpage>332</fpage>-<lpage>344</lpage>
                        <pub-id pub-id-type="doi">10.1111/2041-210X.13104</pub-id>
                    </mixed-citation>
                </ref>
            </ref-list>
        </back>
    </sub-article>
</article>
