<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN" "http://jats.nlm.nih.gov/publishing/1.2/JATS-journalpublishing1.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="1.2" xml:lang="en">
    <front>
        <journal-meta>
            <journal-id journal-id-type="pmc">F1000Research</journal-id>
            <journal-title-group>
                <journal-title>F1000Research</journal-title>
            </journal-title-group>
            <issn pub-type="epub">2046-1402</issn>
            <publisher>
                <publisher-name>F1000 Research Limited</publisher-name>
                <publisher-loc>London, UK</publisher-loc>
            </publisher>
        </journal-meta>
        <article-meta>
            <article-id pub-id-type="doi">10.12688/f1000research.136097.1</article-id>
            <article-categories>
                <subj-group subj-group-type="heading">
                    <subject>Research Article</subject>
                </subj-group>
                <subj-group>
                    <subject>Articles</subject>
                </subj-group>
            </article-categories>
            <title-group>
                <article-title>Graph neural network-based anomaly detection for river network systems</article-title>
                <fn-group content-type="pub-status">
                    <fn>
                        <p>[version 1; peer review: 2 approved with reservations]</p>
                    </fn>
                </fn-group>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Buchhorn</surname>
                        <given-names>Katie</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Software</role>
                    <role content-type="http://credit.niso.org/">Validation</role>
                    <role content-type="http://credit.niso.org/">Visualization</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <uri content-type="orcid">https://orcid.org/0000-0001-6079-1615</uri>
                    <xref ref-type="corresp" rid="c1">a</xref>
                    <xref ref-type="aff" rid="a1">1</xref>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Santos-Fernandez</surname>
                        <given-names>Edgar</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Visualization</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a1">1</xref>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Mengersen</surname>
                        <given-names>Kerrie</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Funding Acquisition</role>
                    <role content-type="http://credit.niso.org/">Supervision</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a1">1</xref>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Salomone</surname>
                        <given-names>Robert</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Supervision</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a2">2</xref>
                    <xref ref-type="aff" rid="a3">3</xref>
                </contrib>
                <aff id="a1">
                    <label>1</label>School of Mathematical Sciences, Queensland University of Technology, Brisbane, Queensland, Australia</aff>
                <aff id="a2">
                    <label>2</label>Centre for Data Science, Queensland University of Technology, Brisbane, Queensland, Australia</aff>
                <aff id="a3">
                    <label>3</label>School of Computer Science, Queensland University of Technology, Brisbane, Queensland, Australia</aff>
            </contrib-group>
            <author-notes>
                <corresp id="c1">
                    <label>a</label>
                    <email xlink:href="mailto:k.buchhorn@qut.edu.au">k.buchhorn@qut.edu.au</email>
                </corresp>
                <fn fn-type="conflict">
                    <p>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>16</day>
                <month>8</month>
                <year>2023</year>
            </pub-date>
            <pub-date pub-type="collection">
                <year>2023</year>
            </pub-date>
            <volume>12</volume>
            <elocation-id>991</elocation-id>
            <history>
                <date date-type="accepted">
                    <day>13</day>
                    <month>7</month>
                    <year>2023</year>
                </date>
            </history>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2023 Buchhorn K et al.</copyright-statement>
                <copyright-year>2023</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access article distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <self-uri content-type="pdf" xlink:href="https://f1000research.com/articles/12-991/pdf"/>
            <abstract>
                <p>
                    <bold>Background:</bold> Water is the lifeblood of river networks, and its quality plays a crucial role in sustaining both aquatic ecosystems and human societies. Real-time monitoring of water quality is increasingly reliant on in-situ sensor technology.</p>
                <p>Anomaly detection is crucial for identifying erroneous patterns in sensor data, but can be a challenging task due to the complexity and variability of the data, even under typical conditions. This paper presents a solution to the challenging task of anomaly detection for river network sensor data, which is essential for accurate and continuous monitoring.</p>
                <p>
                    <bold>Methods:</bold> We use a graph neural network model, the recently proposed Graph Deviation Network (GDN), which employs graph attention-based forecasting to capture the complex spatio-temporal relationships between sensors.</p>
                <p> We propose an alternate anomaly threshold criteria for the model, GDN+, based on the learned graph. To evaluate the model&#x2019;s efficacy, we introduce new benchmarking simulation experiments with highly-sophisticated dependency structures and subsequence anomalies of various types. We also introduce software called gnnad.</p>
                <p>
                    <bold>Results:</bold> We further examine the strengths and weaknesses of this baseline approach, GDN, in comparison to other benchmarking methods on complex real-world river network data.</p>
                <p>
                    <bold>Conclusions:</bold> Findings suggest that GDN+ outperforms the baseline approach in high-dimensional data, while also providing improved interpretability.</p>
            </abstract>
            <kwd-group kwd-group-type="author">
                <kwd>Anomaly Detection</kwd>
                <kwd>Graph Deviation Network</kwd>
                <kwd>Graph Neural Network</kwd>
                <kwd>Multivariate Time Series</kwd>
                <kwd>Graph Attention Forecasting</kwd>
                <kwd>Spatio-temporal Data</kwd>
                <kwd>Complex Systems</kwd>
            </kwd-group>
            <funding-group>
                <award-group id="fund-1" xlink:href="http://dx.doi.org/10.13039/501100000923">
                    <funding-source>Australian Research Council</funding-source>
                    <award-id>LP180101151</award-id>
                </award-group>
                <funding-statement>This work was supported by the Australian Research&#13;
Council (ARC) Linkage Project (LP180101151) titled&#13;
&#x201c;Revolutionising water-quality monitoring in the informa-&#13;
tion age". Case study data were provided by the Depart-&#13;
ment of Environment and Science, Queensland, and is&#13;
available as part of the Python package gnnad.</funding-statement>
                <funding-statement>
                    <italic>The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</italic>
                </funding-statement>
            </funding-group>
        </article-meta>
    </front>
    <body>
        <sec id="sec1" sec-type="intro">
            <title>Introduction</title>
            <p>River network systems play a vital role as freshwater habitats for aquatic life, and as support for terrestrial ecosystems in riparian zones, but are particularly sensitive to the anthropogenic impacts of climate change, water pollution and over-exploitation, among other factors. As a United Nations Sustainable Development Goal,
                <sup>
                    <xref ref-type="bibr" rid="ref1">1</xref>
                </sup> water quality is a major environmental concern worldwide. The use of 
                <italic toggle="yes">in-situ</italic> [
                <xref ref-type="fn" rid="fn1">1</xref>] sensors for data collection on river networks is increasingly prevalent,
                <sup>
                    <xref ref-type="bibr" rid="ref2">2</xref>
                </sup>
                <sup>,</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref3">3</xref>
                </sup> generating large amounts of data that allow for the identification of fine-scale spatial and temporal patterns, trends, and extremes, as well as potential sources of pollutants and their downstream impacts. However, such sensors are susceptible to technical errors relating to the equipment, herein defined as 
                <italic toggle="yes">anomalies</italic>, for example due to miscalibration, biofouling, electrical interference and battery failure. In contrast, extreme 
                <italic toggle="yes">events</italic> in rivers occur as result of heavy rain and floods. Technical anomalies must be identified before the data are considered for further analysis, as they can introduce bias in model parameters and affect the validity of statistical inferences, confounding the identification of true changes in water variables. Trustworthy data is needed to produce reliable and accurate assessments of water quality, for enhanced environmental monitoring, and for guiding management decisions in the prioritisation of ecosystem health.</p>
            <p>Anomaly detection in river networks is challenging due to the highly dynamic nature of river water even under typical conditions,
                <sup>
                    <xref ref-type="bibr" rid="ref4">4</xref>
                </sup> as well as the complex spatial relationships between sensors. The unique spatial relationships between neighbouring sensors on a river network are characterised by a branching network topology with flow direction and connectivity, embedded within the 3-D terrestrial landscape. Common anomalies from data obtained from 
                <italic toggle="yes">in-situ</italic> sensors are generally characterised by multiple consecutive observations (
                <italic toggle="yes">subsequence</italic> or persistent
                <sup>
                    <xref ref-type="bibr" rid="ref5">5</xref>
                </sup>), including sensor drift and periods of unusually high or low variability, which may indicate the necessity for sensor maintenance or calibration.
                <sup>
                    <xref ref-type="bibr" rid="ref6">6</xref>
                </sup> Such anomalies are difficult to detect and often associated with high false negative rates.
                <sup>
                    <xref ref-type="bibr" rid="ref7">7</xref>
                </sup>
            </p>
            <p>Earlier statistical studies have focused on developing autocovariance models based on within-river relationships to capture the unique spatial characteristics of rivers.
                <sup>
                    <xref ref-type="bibr" rid="ref8">8</xref>
                </sup> Although these methods are adaptable to different climate zones, and have recently been extended to take temporal dependencies into account,
                <sup>
                    <xref ref-type="bibr" rid="ref9">9</xref>
                </sup> data sets generated by 
                <italic toggle="yes">in-situ</italic> sensors still pose significant computational challenges with such prediction methods due to the sheer volume of data.
                <sup>
                    <xref ref-type="bibr" rid="ref10">10</xref>
                </sup> Autocovariance matrices must be inverted when fitting spatio-temporal models and making predictions, and the distances between sites must be known. Previous work,
                <sup>
                    <xref ref-type="bibr" rid="ref11">11</xref>
                </sup> aimed to detect drift and high-variability anomalies in water quality variables, by studying a range of neural networks calibrated using a Bayesian multi-objective optimisation procedure. However, the study was limited to analyzing univariate time series data, and the supervised methods required a significant amount of labeled data for training, which are not always available.</p>
            <p>There are limited unsupervised anomaly detection methods for subsequence anomalies (of variable length), and even less so for multivariate time series anomaly detection.
                <sup>
                    <xref ref-type="bibr" rid="ref5">5</xref>
                </sup> One such method uses dynamic clustering on learned segmented windows to identify global and local anomalies.
                <sup>
                    <xref ref-type="bibr" rid="ref12">12</xref>
                </sup> However, this algorithm requires the time series to be well-aligned, and is not suitable for the lagged temporal relationships observed with river flow. Another method to detect variable-length subsequence anomalies in multivariate time series data uses dimensionality reduction to construct a one-dimensional feature, to represent the density of a local region in the recurrence representation, indicating the recurrence of patterns obtained by a sliding window.
                <sup>
                    <xref ref-type="bibr" rid="ref13">13</xref>
                </sup> A similarity measure is used to classify subsequences as either non-anomalous or anomalous. Only two summary statistics were used in this similarity measure and the results were limited to a low-dimensional simulation study. In contrast, the technique introduced by Ref. 
                <xref ref-type="bibr" rid="ref14">14</xref>, DeepAnT, uses a deep convolutional neural network (CNN) to predict one step ahead. This approach uses Euclidean distance of the forecast errors as the anomaly score. However, an anomaly threshold must be provided.</p>
            <p>Despite the above initial advances, challenges still remain in detecting persistent variable-length anomalies within high-dimensional data exhibiting noisy and complex spatial and temporal dependencies. With the aim of addressing such challenges, we explore the application of the recently-proposed Graph Deviation Network (GDN),
                <sup>
                    <xref ref-type="bibr" rid="ref15">15</xref>
                </sup> and explore refinements with respect to anomaly scoring that address the needs of environmental monitoring. The GDN approach
                <sup>
                    <xref ref-type="bibr" rid="ref15">15</xref>
                </sup> is a state-of-the-art model that uses sensor embeddings to capture inter-sensor relationships as a learned graph, and employs graph attention-based forecasting to predict future sensor behaviour. Anomalies are flagged when the error scores are above a calculated threshold value. By learning the interdependencies among variables and predicting based on the typical patterns of the system in a semi-supervised manner, this approach is able to detect deviations when the expected spatial dependencies are disrupted. As such, GDN offers the ability to detect even the small-deviation anomalies generally overlooked by other distance based and density based anomaly detection methods for time series,
                <sup>
                    <xref ref-type="bibr" rid="ref16">16</xref>
                </sup>
                <sup>,</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref17">17</xref>
                </sup> while offering robustness to lagged variable relationships. Unlike the commonly-used statistical methods that explicitly model covariance as a function of distance, GDN is flexible in capturing complex variable relationships independent of distance. GDN is also a semi-supervised approach, eliminating the need to label large amounts of data, and offers a computationally efficient solution to handle the ever increasing supply of sensor data. Despite the existing suite of methods developed for anomaly detection, only a limited number of corresponding software packages are available to practitioners. In summary, we have identified the following gaps in the current literature and research on this topic:
                <list list-type="order">
                    <list-item>
                        <label>1.</label>
                        <p>An urgent need exists for a flexible approach that can effectively capture complex spatial relationships in river networks without the specification of an autocovariance model, and the ability to learn from limited labeled data, in a computationally efficient manner.</p>
                    </list-item>
                    <list-item>
                        <label>2.</label>
                        <p>Lack of data and anomaly generation schemes on which to benchmark methods, that exhibit complex spatial and temporal dependencies, as observed across river networks.</p>
                    </list-item>
                    <list-item>
                        <label>3.</label>
                        <p>Lack of open-source software for anomaly detection, which hinders the accessibility and reproducibility of research in this field, and limits the ability for individuals and organisations to implement effective anomaly detection strategies.</p>
                    </list-item>
                </list>
            </p>
            <p>Our work makes four primary contributions to the field:
                <list list-type="order">
                    <list-item>
                        <label>1.</label>
                        <p>An improvement of the GDN approach via the threshold calculation based on the learned graph is presented, and shown to detect anomalies more accurately than GDN while improving the ability to locate anomalies across a network.</p>
                    </list-item>
                    <list-item>
                        <label>2.</label>
                        <p>Methods for simulating new benchmark data with highly-sophisticated spatio-temporal structures are provided, contaminated with various types of persistent anomalies.</p>
                    </list-item>
                    <list-item>
                        <label>3.</label>
                        <p>Numerical studies are conducted, featuring a suite of benchmarking data sets, as well as real-world river network data, to explore the strengths and limitations of GDN (and its variants) in increasingly challenging settings.</p>
                    </list-item>
                    <list-item>
                        <label>4.</label>
                        <p>User-friendly, free open-source software for the GDN/GDN+ approach is made available on the pip repository as gnnad, with data and anomaly generation modules, as well as the publication of a novel real-world data set.</p>
                    </list-item>
                </list>
            </p>
            <p>The structure of the remainder of the paper is as follows: The next section details the methods of GDN and the model extension GDN+, and describes the methodology of the simulated data and anomaly generation. In the Results section we present an extensive simulation study on the benchmarking data, as well as a real-world case study. The performance of GDN/GDN+ is assessed against other state-of-the-art anomaly detection models. Further details and example code for the newly-released software are also provided. The paper concludes with a discussion of the findings, and the strengths and weaknesses of the considered models.</p>
        </sec>
        <sec id="sec2" sec-type="methods">
            <title>Methods</title>
            <p>Consider multivariate time series data 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi mathvariant="normal">Y</mml:mi>
                        <mml:mo>=</mml:mo>
                        <mml:mfenced close="]" open="[" separators=",,">
                            <mml:msup>
                                <mml:mi mathvariant="bold-italic">y</mml:mi>
                                <mml:mfenced close=")" open="(">
                                    <mml:mn>1</mml:mn>
                                </mml:mfenced>
                            </mml:msup>
                            <mml:mo>&#x2026;</mml:mo>
                            <mml:msup>
                                <mml:mi mathvariant="bold-italic">y</mml:mi>
                                <mml:mfenced close=")" open="(">
                                    <mml:mi>T</mml:mi>
                                </mml:mfenced>
                            </mml:msup>
                        </mml:mfenced>
                    </mml:math>
                </inline-formula>, obtained from 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>n</mml:mi>
                    </mml:math>
                </inline-formula> sensors over 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>T</mml:mi>
                    </mml:math>
                </inline-formula> time ticks. The (univariate) data collected from sensor 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>i</mml:mi>
                        <mml:mo>=</mml:mo>
                        <mml:mn>1</mml:mn>
                        <mml:mo>,</mml:mo>
                        <mml:mo>&#x2026;</mml:mo>
                        <mml:mo>,</mml:mo>
                        <mml:mi>n</mml:mi>
                    </mml:math>
                </inline-formula> at time 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>t</mml:mi>
                        <mml:mo>=</mml:mo>
                        <mml:mn>1</mml:mn>
                        <mml:mo>,</mml:mo>
                        <mml:mo>&#x2026;</mml:mo>
                        <mml:mo>,</mml:mo>
                        <mml:mi>T</mml:mi>
                    </mml:math>
                </inline-formula> are denoted as 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msubsup>
                            <mml:mi>y</mml:mi>
                            <mml:mi>i</mml:mi>
                            <mml:mfenced close=")" open="(">
                                <mml:mi>t</mml:mi>
                            </mml:mfenced>
                        </mml:msubsup>
                    </mml:math>
                </inline-formula>. Following the standard semi-supervised anomaly detection approach,
                <sup>
                    <xref ref-type="bibr" rid="ref18">18</xref>
                </sup>
                <sup>,</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref19">19</xref>
                </sup> non-anomalous data are used for training, while the test set may contain anomalous data. That is, we aim to learn the sensor behaviour using data obtained under standard operational conditions throughout the training phase and identify anomalous sensor readings during testing, as those which deviate substantially from the learned behaviour. As the algorithm output, each test point 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msup>
                            <mml:mi mathvariant="bold">y</mml:mi>
                            <mml:mfenced close=")" open="(">
                                <mml:mi>t</mml:mi>
                            </mml:mfenced>
                        </mml:msup>
                    </mml:math>
                </inline-formula> is assigned a binary label, 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>a</mml:mi>
                        <mml:mfenced close=")" open="(">
                            <mml:mi>t</mml:mi>
                        </mml:mfenced>
                        <mml:mo>&#x2208;</mml:mo>
                        <mml:mfenced close="}" open="{" separators=",">
                            <mml:mn>0</mml:mn>
                            <mml:mn>1</mml:mn>
                        </mml:mfenced>
                    </mml:math>
                </inline-formula>, where 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>a</mml:mi>
                        <mml:mfenced close=")" open="(">
                            <mml:mi>t</mml:mi>
                        </mml:mfenced>
                        <mml:mo>=</mml:mo>
                        <mml:mn>1</mml:mn>
                    </mml:math>
                </inline-formula> indicates an anomaly at time 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>t</mml:mi>
                    </mml:math>
                </inline-formula>, anywhere across the full sensor network.</p>
            <p>The GDN approach
                <sup>
                    <xref ref-type="bibr" rid="ref15">15</xref>
                </sup> for anomaly detection is composed of two aspects:
                <list list-type="order">
                    <list-item>
                        <label>1.</label>
                        <p>
                            <bold>Forecasting-based time series model:</bold> a non-linear 
                            <italic toggle="yes">autoregressive</italic> multivariate time series model that involves graph neural networks is trained, and</p>
                    </list-item>
                    <list-item>
                        <label>2.</label>
                        <p>
                            <bold>Threshold-based anomaly detection:</bold> transformations of the individual forecasting errors are used to determine if an anomaly has occurred, if such errors exceed a calculated threshold.</p>
                    </list-item>
                </list>
            </p>
            <p>The above components are described in more detail below.</p>
            <p>
                <bold>Forecasting-based Time Series Model.</bold> To predict 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msup>
                            <mml:mi mathvariant="bold-italic">y</mml:mi>
                            <mml:mfenced close=")" open="(">
                                <mml:mi>t</mml:mi>
                            </mml:mfenced>
                        </mml:msup>
                    </mml:math>
                </inline-formula>, the model takes as input 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>w</mml:mi>
                        <mml:mo>&#x2208;</mml:mo>
                        <mml:mi mathvariant="normal">&#x2115;</mml:mi>
                    </mml:math>
                </inline-formula> lags of the multivariate series,
                <disp-formula id="e1">
                    <mml:math display="block">
                        <mml:msup>
                            <mml:mi mathvariant="normal">X</mml:mi>
                            <mml:mfenced close=")" open="(">
                                <mml:mi>t</mml:mi>
                            </mml:mfenced>
                        </mml:msup>
                        <mml:mo>&#x2254;</mml:mo>
                        <mml:mfenced close="]" open="[" separators=",,">
                            <mml:msup>
                                <mml:mi mathvariant="bold-italic">y</mml:mi>
                                <mml:mfenced close=")" open="(">
                                    <mml:mrow>
                                        <mml:mi>t</mml:mi>
                                        <mml:mo>&#x2212;</mml:mo>
                                        <mml:mn>1</mml:mn>
                                    </mml:mrow>
                                </mml:mfenced>
                            </mml:msup>
                            <mml:mo>&#x2026;</mml:mo>
                            <mml:msup>
                                <mml:mi mathvariant="bold-italic">y</mml:mi>
                                <mml:mfenced close=")" open="(">
                                    <mml:mrow>
                                        <mml:mi>t</mml:mi>
                                        <mml:mo>&#x2212;</mml:mo>
                                        <mml:mi>w</mml:mi>
                                    </mml:mrow>
                                </mml:mfenced>
                            </mml:msup>
                        </mml:mfenced>
                        <mml:mo>.</mml:mo>
                    </mml:math>
                </disp-formula>
            </p>
            <p>The 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>i</mml:mi>
                    </mml:math>
                </inline-formula>-th row (containing sensor 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>i</mml:mi>
                    </mml:math>
                </inline-formula>&#x2019;s measurements for the previous 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>w</mml:mi>
                    </mml:math>
                </inline-formula> lags) of the above input matrix is represented by the column vector, 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msubsup>
                            <mml:mi mathvariant="bold-italic">x</mml:mi>
                            <mml:mi>i</mml:mi>
                            <mml:mfenced close=")" open="(">
                                <mml:mi>t</mml:mi>
                            </mml:mfenced>
                        </mml:msubsup>
                        <mml:mo>=</mml:mo>
                        <mml:mfenced close=")" open="(" separators=",,">
                            <mml:msubsup>
                                <mml:mi>y</mml:mi>
                                <mml:mi>i</mml:mi>
                                <mml:mfenced close=")" open="(">
                                    <mml:mrow>
                                        <mml:mi>t</mml:mi>
                                        <mml:mo>&#x2212;</mml:mo>
                                        <mml:mn>1</mml:mn>
                                    </mml:mrow>
                                </mml:mfenced>
                            </mml:msubsup>
                            <mml:mo>&#x2026;</mml:mo>
                            <mml:msubsup>
                                <mml:mi>y</mml:mi>
                                <mml:mi>i</mml:mi>
                                <mml:mfenced close=")" open="(">
                                    <mml:mrow>
                                        <mml:mi>t</mml:mi>
                                        <mml:mo>&#x2212;</mml:mo>
                                        <mml:mi>w</mml:mi>
                                    </mml:mrow>
                                </mml:mfenced>
                            </mml:msubsup>
                        </mml:mfenced>
                    </mml:math>
                </inline-formula>. Prior to training, the practitioner specifies acceptable 
                <italic toggle="yes">candidate relationships</italic> via the sets 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msub>
                            <mml:mi mathvariant="script">C</mml:mi>
                            <mml:mn>1</mml:mn>
                        </mml:msub>
                        <mml:mo>,</mml:mo>
                        <mml:mo>&#x2026;</mml:mo>
                        <mml:mo>,</mml:mo>
                        <mml:msub>
                            <mml:mi mathvariant="script">C</mml:mi>
                            <mml:mi>n</mml:mi>
                        </mml:msub>
                    </mml:math>
                </inline-formula>, where each 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msub>
                            <mml:mi mathvariant="script">C</mml:mi>
                            <mml:mi>i</mml:mi>
                        </mml:msub>
                        <mml:mo>&#x2286;</mml:mo>
                        <mml:mfenced close="}" open="{" separators=",,,">
                            <mml:mn>1</mml:mn>
                            <mml:mn>2</mml:mn>
                            <mml:mo>&#x2026;</mml:mo>
                            <mml:mi>n</mml:mi>
                        </mml:mfenced>
                    </mml:math>
                </inline-formula> and does not contain 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>i</mml:mi>
                    </mml:math>
                </inline-formula>. These sets specify which nodes are allowed to be considered to be connected 
                <italic toggle="yes">from</italic> node 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>i</mml:mi>
                    </mml:math>
                </inline-formula> (noting that the adjacency graph connections are not necessarily symmetric).</p>
            <p>The model implicitly learns a graph structure via training 
                <italic toggle="yes">sensor embedding</italic> parameters 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msub>
                            <mml:mi mathvariant="bold-italic">v</mml:mi>
                            <mml:mi>i</mml:mi>
                        </mml:msub>
                        <mml:mo>&#x2208;</mml:mo>
                        <mml:msup>
                            <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                            <mml:mi>d</mml:mi>
                        </mml:msup>
                    </mml:math>
                </inline-formula> for 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>i</mml:mi>
                        <mml:mo>=</mml:mo>
                        <mml:mn>1</mml:mn>
                        <mml:mo>,</mml:mo>
                        <mml:mo>&#x2026;</mml:mo>
                        <mml:mo>,</mml:mo>
                        <mml:mi>n</mml:mi>
                    </mml:math>
                </inline-formula> which are used to construct a graph. The intuition is that the embedding vectors capture the inherent characteristics of each sensor, and that sensors which are &#x201c;similar&#x201d; in terms of the angle between their vector embeddings are considered connected. Formally, the quantity 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msub>
                            <mml:mi>e</mml:mi>
                            <mml:mi mathvariant="italic">ji</mml:mi>
                        </mml:msub>
                    </mml:math>
                </inline-formula> is defined as the cosine similarity between the vector embeddings of sensors 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>i</mml:mi>
                    </mml:math>
                </inline-formula> and 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>j</mml:mi>
                    </mml:math>
                </inline-formula>:
                <disp-formula id="e2">
                    <mml:math display="block">
                        <mml:msub>
                            <mml:mi>e</mml:mi>
                            <mml:mi mathvariant="italic">ji</mml:mi>
                        </mml:msub>
                        <mml:mo>=</mml:mo>
                        <mml:mfrac>
                            <mml:mrow>
                                <mml:msubsup>
                                    <mml:mi mathvariant="bold-italic">v</mml:mi>
                                    <mml:mi>i</mml:mi>
                                    <mml:mi>&#x03a4;</mml:mi>
                                </mml:msubsup>
                                <mml:msub>
                                    <mml:mi mathvariant="bold-italic">v</mml:mi>
                                    <mml:mi>j</mml:mi>
                                </mml:msub>
                            </mml:mrow>
                            <mml:mrow>
                                <mml:mo stretchy="true">&#x2016;</mml:mo>
                                <mml:msub>
                                    <mml:mi mathvariant="bold-italic">v</mml:mi>
                                    <mml:mi>i</mml:mi>
                                </mml:msub>
                                <mml:mo stretchy="true">&#x2016;</mml:mo>
                                <mml:mspace width="0.1em"/>
                                <mml:mo stretchy="true">&#x2016;</mml:mo>
                                <mml:msub>
                                    <mml:mi mathvariant="bold-italic">v</mml:mi>
                                    <mml:mi>j</mml:mi>
                                </mml:msub>
                                <mml:mo stretchy="true">&#x2016;</mml:mo>
                            </mml:mrow>
                        </mml:mfrac>
                        <mml:mi mathvariant="double-struck">I</mml:mi>
                        <mml:mfenced close="}" open="{">
                            <mml:mrow>
                                <mml:mi>j</mml:mi>
                                <mml:mo>&#x2208;</mml:mo>
                                <mml:msub>
                                    <mml:mi mathvariant="script">C</mml:mi>
                                    <mml:mi>i</mml:mi>
                                </mml:msub>
                            </mml:mrow>
                        </mml:mfenced>
                        <mml:mo>,</mml:mo>
                        <mml:mspace width="1em"/>
                        <mml:mi>i</mml:mi>
                        <mml:mo>,</mml:mo>
                        <mml:mi>j</mml:mi>
                        <mml:mo>&#x2208;</mml:mo>
                        <mml:mfenced close="}" open="{" separators=",,">
                            <mml:mn>1</mml:mn>
                            <mml:mo>&#x2026;</mml:mo>
                            <mml:mi>n</mml:mi>
                        </mml:mfenced>
                        <mml:mo>,</mml:mo>
                    </mml:math>
                </disp-formula>with 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mo stretchy="true">&#x2016;</mml:mo>
                        <mml:mo>&#x22c5;</mml:mo>
                        <mml:mo stretchy="true">&#x2016;</mml:mo>
                    </mml:math>
                </inline-formula> denoting the Euclidean norm, and indicator function 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi mathvariant="double-struck">I</mml:mi>
                    </mml:math>
                </inline-formula> which equals 1 when node 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>j</mml:mi>
                    </mml:math>
                </inline-formula> belongs to set 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msub>
                            <mml:mi mathvariant="script">C</mml:mi>
                            <mml:mi>i</mml:mi>
                        </mml:msub>
                    </mml:math>
                </inline-formula>, and 0 otherwise. Note that the similarity is forced to be zero if a connecting node is not in the permissible candidate set. Next, let 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msub>
                            <mml:mi>e</mml:mi>
                            <mml:mrow>
                                <mml:mi>j</mml:mi>
                                <mml:mo>,</mml:mo>
                                <mml:mfenced close=")" open="(">
                                    <mml:mi>i</mml:mi>
                                </mml:mfenced>
                            </mml:mrow>
                        </mml:msub>
                    </mml:math>
                </inline-formula> be the 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>i</mml:mi>
                    </mml:math>
                </inline-formula>-th largest value in 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mfenced close=")" open="(" separators=",,">
                            <mml:msub>
                                <mml:mi>e</mml:mi>
                                <mml:mrow>
                                    <mml:mi>j</mml:mi>
                                    <mml:mn>1</mml:mn>
                                </mml:mrow>
                            </mml:msub>
                            <mml:mo>&#x2026;</mml:mo>
                            <mml:msub>
                                <mml:mi>e</mml:mi>
                                <mml:mi mathvariant="italic">jn</mml:mi>
                            </mml:msub>
                        </mml:mfenced>
                    </mml:math>
                </inline-formula>. A 
                <italic toggle="yes">graph-adjacency matrix</italic> (and in turn a graph itself) is then constructed from the sensor similarities via:
                <disp-formula id="e3">
                    <mml:math display="block">
                        <mml:msub>
                            <mml:mi>A</mml:mi>
                            <mml:mi mathvariant="italic">ji</mml:mi>
                        </mml:msub>
                        <mml:mo>=</mml:mo>
                        <mml:mi mathvariant="double-struck">I</mml:mi>
                        <mml:mfenced close="}" open="{">
                            <mml:mrow>
                                <mml:mfenced close="}" open="{">
                                    <mml:mrow>
                                        <mml:msub>
                                            <mml:mi>e</mml:mi>
                                            <mml:mi mathvariant="italic">ji</mml:mi>
                                        </mml:msub>
                                        <mml:mo>&#x2265;</mml:mo>
                                        <mml:msub>
                                            <mml:mi>e</mml:mi>
                                            <mml:mrow>
                                                <mml:mi>j</mml:mi>
                                                <mml:mo>,</mml:mo>
                                                <mml:mfenced close=")" open="(">
                                                    <mml:mi>K</mml:mi>
                                                </mml:mfenced>
                                            </mml:mrow>
                                        </mml:msub>
                                    </mml:mrow>
                                </mml:mfenced>
                                <mml:mo>&#x222a;</mml:mo>
                                <mml:mfenced close="}" open="{">
                                    <mml:mrow>
                                        <mml:mi>i</mml:mi>
                                        <mml:mo>=</mml:mo>
                                        <mml:mi>j</mml:mi>
                                    </mml:mrow>
                                </mml:mfenced>
                            </mml:mrow>
                        </mml:mfenced>
                        <mml:mo>,</mml:mo>
                    </mml:math>
                </disp-formula>for user-specified 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>K</mml:mi>
                        <mml:mo>&#x2208;</mml:mo>
                        <mml:mfenced close="}" open="{" separators=",,">
                            <mml:mn>1</mml:mn>
                            <mml:mo>&#x2026;</mml:mo>
                            <mml:mi>n</mml:mi>
                        </mml:mfenced>
                    </mml:math>
                </inline-formula> which determines the maximum number of edges from a node, referred to as the &#x201c;Top-K&#x201d; hyperparameter.</p>
            <p>The above describes how the trainable parameters 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msubsup>
                            <mml:mfenced close="}" open="{">
                                <mml:msub>
                                    <mml:mi mathvariant="bold-italic">v</mml:mi>
                                    <mml:mi>k</mml:mi>
                                </mml:msub>
                            </mml:mfenced>
                            <mml:mrow>
                                <mml:mi>k</mml:mi>
                                <mml:mo>=</mml:mo>
                                <mml:mn>1</mml:mn>
                            </mml:mrow>
                            <mml:mi>n</mml:mi>
                        </mml:msubsup>
                    </mml:math>
                </inline-formula> yield a graph. Next, the lagged series are fed individually through a shallow 
                <italic toggle="yes">Graph Attention Network</italic>
                <sup>
                    <xref ref-type="bibr" rid="ref20">20</xref>
                </sup> that uses the previously constructed graph. Here, each 
                <italic toggle="yes">node</italic> corresponds to a sensor, and the 
                <italic toggle="yes">node features</italic> for node 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>i</mml:mi>
                    </mml:math>
                </inline-formula> are the lagged (univariate) time-series values, 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msubsup>
                            <mml:mi mathvariant="bold-italic">x</mml:mi>
                            <mml:mi>i</mml:mi>
                            <mml:mfenced close=")" open="(">
                                <mml:mi>t</mml:mi>
                            </mml:mfenced>
                        </mml:msubsup>
                        <mml:mo>&#x2208;</mml:mo>
                        <mml:msup>
                            <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                            <mml:mi>w</mml:mi>
                        </mml:msup>
                    </mml:math>
                </inline-formula>. Allow a parameter weight matrix 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi mathvariant="normal">W</mml:mi>
                        <mml:mo>&#x2208;</mml:mo>
                        <mml:msup>
                            <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                            <mml:mrow>
                                <mml:mi>d</mml:mi>
                                <mml:mo>&#x00d7;</mml:mo>
                                <mml:mi>w</mml:mi>
                            </mml:mrow>
                        </mml:msup>
                    </mml:math>
                </inline-formula> to apply a shared linear transform to each node. Then, the output of the network is given by
                <disp-formula id="e4">
                    <mml:math display="block">
                        <mml:msubsup>
                            <mml:mi mathvariant="bold">z</mml:mi>
                            <mml:mi>i</mml:mi>
                            <mml:mfenced close=")" open="(">
                                <mml:mi>t</mml:mi>
                            </mml:mfenced>
                        </mml:msubsup>
                        <mml:mo>=</mml:mo>
                        <mml:mo>max</mml:mo>
                        <mml:mfenced close="}" open="{" separators=",">
                            <mml:mn mathvariant="bold">0</mml:mn>
                            <mml:mfenced close=")" open="(">
                                <mml:mrow>
                                    <mml:munder>
                                        <mml:mo movablelimits="false">&#x2211;</mml:mo>
                                        <mml:mrow>
                                            <mml:mi>j</mml:mi>
                                            <mml:mo>:</mml:mo>
                                            <mml:msub>
                                                <mml:mi>A</mml:mi>
                                                <mml:mi mathvariant="italic">ji</mml:mi>
                                            </mml:msub>
                                            <mml:mo>&gt;</mml:mo>
                                            <mml:mn>0</mml:mn>
                                        </mml:mrow>
                                    </mml:munder>
                                    <mml:msub>
                                        <mml:mi>&#x03b1;</mml:mi>
                                        <mml:mi mathvariant="italic">ij</mml:mi>
                                    </mml:msub>
                                    <mml:mi mathvariant="normal">W</mml:mi>
                                    <mml:msubsup>
                                        <mml:mi mathvariant="bold-italic">x</mml:mi>
                                        <mml:mi>i</mml:mi>
                                        <mml:mfenced close=")" open="(">
                                            <mml:mi>t</mml:mi>
                                        </mml:mfenced>
                                    </mml:msubsup>
                                </mml:mrow>
                            </mml:mfenced>
                        </mml:mfenced>
                        <mml:mo>,</mml:mo>
                    </mml:math>
                </disp-formula>where 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msubsup>
                            <mml:mi mathvariant="bold-italic">z</mml:mi>
                            <mml:mi>i</mml:mi>
                            <mml:mfenced close=")" open="(">
                                <mml:mi>t</mml:mi>
                            </mml:mfenced>
                        </mml:msubsup>
                    </mml:math>
                </inline-formula> is called the 
                <italic toggle="yes">node representation</italic>, and coefficients 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msub>
                            <mml:mi>&#x03b1;</mml:mi>
                            <mml:mi mathvariant="italic">ij</mml:mi>
                        </mml:msub>
                    </mml:math>
                </inline-formula> are the attention paid to node 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>j</mml:mi>
                    </mml:math>
                </inline-formula> when computing the representation for node 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>i</mml:mi>
                    </mml:math>
                </inline-formula>, with:
                <disp-formula id="e5">
                    <mml:math display="block">
                        <mml:mtable columnalign="left" displaystyle="true">
                            <mml:mtr>
                                <mml:mtd>
                                    <mml:msub>
                                        <mml:mi>&#x03c0;</mml:mi>
                                        <mml:mi mathvariant="italic">ij</mml:mi>
                                    </mml:msub>
                                    <mml:mo>=</mml:mo>
                                    <mml:mtext>LeakyReLU</mml:mtext>
                                    <mml:mfenced close=")" open="(">
                                        <mml:mrow>
                                            <mml:msup>
                                                <mml:mi mathvariant="bold-italic">a</mml:mi>
                                                <mml:mi>&#x03a4;</mml:mi>
                                            </mml:msup>
                                            <mml:mfenced close=")" open="(">
                                                <mml:mrow>
                                                    <mml:msub>
                                                        <mml:mi mathvariant="bold-italic">v</mml:mi>
                                                        <mml:mi>i</mml:mi>
                                                    </mml:msub>
                                                    <mml:mo>&#x2295;</mml:mo>
                                                    <mml:mi mathvariant="normal">W</mml:mi>
                                                    <mml:msubsup>
                                                        <mml:mi mathvariant="bold-italic">x</mml:mi>
                                                        <mml:mi>i</mml:mi>
                                                        <mml:mfenced close=")" open="(">
                                                            <mml:mi>t</mml:mi>
                                                        </mml:mfenced>
                                                    </mml:msubsup>
                                                    <mml:mo>+</mml:mo>
                                                    <mml:msub>
                                                        <mml:mi mathvariant="bold-italic">v</mml:mi>
                                                        <mml:mi>j</mml:mi>
                                                    </mml:msub>
                                                    <mml:mo>&#x2295;</mml:mo>
                                                    <mml:mi mathvariant="normal">W</mml:mi>
                                                    <mml:msubsup>
                                                        <mml:mi mathvariant="bold-italic">x</mml:mi>
                                                        <mml:mi>j</mml:mi>
                                                        <mml:mfenced close=")" open="(">
                                                            <mml:mi>t</mml:mi>
                                                        </mml:mfenced>
                                                    </mml:msubsup>
                                                </mml:mrow>
                                            </mml:mfenced>
                                        </mml:mrow>
                                    </mml:mfenced>
                                    <mml:mo>,</mml:mo>
                                </mml:mtd>
                            </mml:mtr>
                            <mml:mtr>
                                <mml:mtd>
                                    <mml:mspace width="1em"/>
                                    <mml:mtext>where</mml:mtext>
                                    <mml:mspace width="0.5em"/>
                                    <mml:msub>
                                        <mml:mi>&#x03b1;</mml:mi>
                                        <mml:mi mathvariant="italic">ij</mml:mi>
                                    </mml:msub>
                                    <mml:mo>=</mml:mo>
                                    <mml:mfrac>
                                        <mml:mrow>
                                            <mml:mo>exp</mml:mo>
                                            <mml:mfenced close=")" open="(">
                                                <mml:msub>
                                                    <mml:mi>&#x03c0;</mml:mi>
                                                    <mml:mi mathvariant="italic">ij</mml:mi>
                                                </mml:msub>
                                            </mml:mfenced>
                                        </mml:mrow>
                                        <mml:mrow>
                                            <mml:munder>
                                                <mml:mo movablelimits="false">&#x2211;</mml:mo>
                                                <mml:mrow>
                                                    <mml:mi>k</mml:mi>
                                                    <mml:mo>:</mml:mo>
                                                    <mml:msub>
                                                        <mml:mi>A</mml:mi>
                                                        <mml:mi mathvariant="italic">ki</mml:mi>
                                                    </mml:msub>
                                                    <mml:mo>&gt;</mml:mo>
                                                    <mml:mn>0</mml:mn>
                                                </mml:mrow>
                                            </mml:munder>
                                            <mml:mo>exp</mml:mo>
                                            <mml:mfenced close=")" open="(">
                                                <mml:msub>
                                                    <mml:mi>&#x03c0;</mml:mi>
                                                    <mml:mi mathvariant="italic">ik</mml:mi>
                                                </mml:msub>
                                            </mml:mfenced>
                                        </mml:mrow>
                                    </mml:mfrac>
                                    <mml:mo>,</mml:mo>
                                </mml:mtd>
                            </mml:mtr>
                        </mml:mtable>
                    </mml:math>
                    <label>(1)</label>
                </disp-formula>with learnable parameters 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi mathvariant="bold-italic">a</mml:mi>
                        <mml:mo>&#x2208;</mml:mo>
                        <mml:msup>
                            <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                            <mml:mrow>
                                <mml:mn>2</mml:mn>
                                <mml:mi>d</mml:mi>
                            </mml:mrow>
                        </mml:msup>
                    </mml:math>
                </inline-formula>, where 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mo>&#x2295;</mml:mo>
                    </mml:math>
                </inline-formula> denotes concatenation, and 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mtext>LeakyReLU</mml:mtext>
                        <mml:mfenced close=")" open="(">
                            <mml:mi mathvariant="bold-italic">x</mml:mi>
                        </mml:mfenced>
                        <mml:mo>&#x2254;</mml:mo>
                        <mml:mo>max</mml:mo>
                        <mml:mfenced close="}" open="{" separators=",">
                            <mml:mrow>
                                <mml:mi>&#x03b4;</mml:mi>
                                <mml:mi mathvariant="bold-italic">x</mml:mi>
                            </mml:mrow>
                            <mml:mi mathvariant="bold-italic">x</mml:mi>
                        </mml:mfenced>
                    </mml:math>
                </inline-formula> for 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>&#x03b4;</mml:mi>
                        <mml:mo>&gt;</mml:mo>
                        <mml:mn>0</mml:mn>
                    </mml:math>
                </inline-formula>, with the maximum operation applied elementwise. Note the addition [
                <xref ref-type="fn" rid="fn2">2</xref>] in 
                <xref ref-type="disp-formula" rid="e5">Equation 1</xref> and that 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msubsup>
                            <mml:mo>&#x2211;</mml:mo>
                            <mml:mrow>
                                <mml:mi>j</mml:mi>
                                <mml:mo>=</mml:mo>
                                <mml:mn>1</mml:mn>
                            </mml:mrow>
                            <mml:mi>n</mml:mi>
                        </mml:msubsup>
                        <mml:msub>
                            <mml:mi>&#x03b1;</mml:mi>
                            <mml:mi mathvariant="italic">ij</mml:mi>
                        </mml:msub>
                        <mml:mo>=</mml:mo>
                        <mml:mn>1</mml:mn>
                    </mml:math>
                </inline-formula>. Intuitively, the above is an automated mechanism to aggregate information from a node itself and neighbouring nodes (whilst simultaneously assigning a weight of how much information to take from each neighbour) to compute a vector representing extracted information about node 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>i</mml:mi>
                    </mml:math>
                </inline-formula> itself and its neighbours&#x2019; interaction with it. The final model output (prediction) is given by,
                <disp-formula id="e6">
                    <mml:math display="block">
                        <mml:msup>
                            <mml:mover accent="true">
                                <mml:mi mathvariant="bold-italic">y</mml:mi>
                                <mml:mo stretchy="true">&#x0302;</mml:mo>
                            </mml:mover>
                            <mml:mfenced close=")" open="(">
                                <mml:mi>t</mml:mi>
                            </mml:mfenced>
                        </mml:msup>
                        <mml:mo>=</mml:mo>
                        <mml:msub>
                            <mml:mi>f</mml:mi>
                            <mml:mo>&#x03b7;</mml:mo>
                        </mml:msub>
                        <mml:mfenced close=")" open="(">
                            <mml:mfenced close="]" open="[" separators=",,">
                                <mml:mrow>
                                    <mml:msub>
                                        <mml:mi mathvariant="bold-italic">v</mml:mi>
                                        <mml:mn>1</mml:mn>
                                    </mml:msub>
                                    <mml:mo>&#x2299;</mml:mo>
                                    <mml:msubsup>
                                        <mml:mi mathvariant="bold-italic">z</mml:mi>
                                        <mml:mn>1</mml:mn>
                                        <mml:mfenced close=")" open="(">
                                            <mml:mi>t</mml:mi>
                                        </mml:mfenced>
                                    </mml:msubsup>
                                </mml:mrow>
                                <mml:mo>&#x2026;</mml:mo>
                                <mml:mrow>
                                    <mml:msub>
                                        <mml:mi mathvariant="bold-italic">v</mml:mi>
                                        <mml:mi>n</mml:mi>
                                    </mml:msub>
                                    <mml:mo>&#x2299;</mml:mo>
                                    <mml:msubsup>
                                        <mml:mi mathvariant="bold-italic">z</mml:mi>
                                        <mml:mi>n</mml:mi>
                                        <mml:mfenced close=")" open="(">
                                            <mml:mi>t</mml:mi>
                                        </mml:mfenced>
                                    </mml:msubsup>
                                </mml:mrow>
                            </mml:mfenced>
                        </mml:mfenced>
                        <mml:mo>,</mml:mo>
                    </mml:math>
                </disp-formula>where 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msub>
                            <mml:mi>f</mml:mi>
                            <mml:mi>&#x03b7;</mml:mi>
                        </mml:msub>
                        <mml:mo>:</mml:mo>
                        <mml:msup>
                            <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                            <mml:mrow>
                                <mml:mi>d</mml:mi>
                                <mml:mo>&#x00d7;</mml:mo>
                                <mml:mi>n</mml:mi>
                            </mml:mrow>
                        </mml:msup>
                        <mml:mo>&#x2192;</mml:mo>
                        <mml:msup>
                            <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                            <mml:mi>n</mml:mi>
                        </mml:msup>
                    </mml:math>
                </inline-formula> is a feedforward neural network with parameters 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi mathvariant="bold-italic">&#x03b7;</mml:mi>
                    </mml:math>
                </inline-formula>, and 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mo>&#x2299;</mml:mo>
                    </mml:math>
                </inline-formula> denotes element-wise multiplication. The model is trained by optimizing the parameters 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msubsup>
                            <mml:mfenced close="}" open="{">
                                <mml:msub>
                                    <mml:mi mathvariant="bold-italic">v</mml:mi>
                                    <mml:mi>i</mml:mi>
                                </mml:msub>
                            </mml:mfenced>
                            <mml:mrow>
                                <mml:mi>i</mml:mi>
                                <mml:mo>=</mml:mo>
                                <mml:mn>1</mml:mn>
                            </mml:mrow>
                            <mml:mi>n</mml:mi>
                        </mml:msubsup>
                    </mml:math>
                </inline-formula>, 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi mathvariant="normal">W</mml:mi>
                    </mml:math>
                </inline-formula>, 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi mathvariant="bold-italic">a</mml:mi>
                    </mml:math>
                </inline-formula>, and 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi mathvariant="bold-italic">&#x03b7;</mml:mi>
                    </mml:math>
                </inline-formula> to minimize the mean squared error loss function
                <disp-formula id="e7">
                    <mml:math display="block">
                        <mml:mi mathvariant="script">L</mml:mi>
                        <mml:mo>=</mml:mo>
                        <mml:mfrac>
                            <mml:mn>1</mml:mn>
                            <mml:mrow>
                                <mml:mi>T</mml:mi>
                                <mml:mo>&#x2212;</mml:mo>
                                <mml:mi>w</mml:mi>
                            </mml:mrow>
                        </mml:mfrac>
                        <mml:munderover>
                            <mml:mo movablelimits="false">&#x2211;</mml:mo>
                            <mml:mrow>
                                <mml:mi>t</mml:mi>
                                <mml:mo>=</mml:mo>
                                <mml:mi>w</mml:mi>
                                <mml:mo>+</mml:mo>
                                <mml:mn>1</mml:mn>
                            </mml:mrow>
                            <mml:mi>T</mml:mi>
                        </mml:munderover>
                        <mml:msup>
                            <mml:mfenced close="&#x2016;" open="&#x2016;">
                                <mml:mrow>
                                    <mml:msup>
                                        <mml:mover accent="true">
                                            <mml:mi mathvariant="bold-italic">y</mml:mi>
                                            <mml:mo stretchy="true">&#x0302;</mml:mo>
                                        </mml:mover>
                                        <mml:mfenced close=")" open="(">
                                            <mml:mi>t</mml:mi>
                                        </mml:mfenced>
                                    </mml:msup>
                                    <mml:mo>&#x2212;</mml:mo>
                                    <mml:msup>
                                        <mml:mi mathvariant="bold-italic">y</mml:mi>
                                        <mml:mfenced close=")" open="(">
                                            <mml:mi>t</mml:mi>
                                        </mml:mfenced>
                                    </mml:msup>
                                </mml:mrow>
                            </mml:mfenced>
                            <mml:mn>2</mml:mn>
                        </mml:msup>
                        <mml:mo>.</mml:mo>
                    </mml:math>
                </disp-formula>
            </p>
            <p>
                <bold>Threshold-based Anomaly Detection.</bold> Given the learned inter-sensor and temporal relationships, we are able to detect anomalies as those which deviate from these interdependencies. An 
                <italic toggle="yes">anomalousness score</italic> is computed for each time point in the test data. For each sensor 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>i</mml:mi>
                    </mml:math>
                </inline-formula>, we denote the prediction error at time 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>t</mml:mi>
                    </mml:math>
                </inline-formula> as,
                <disp-formula id="e8">
                    <mml:math display="block">
                        <mml:msub>
                            <mml:mi>&#x03b5;</mml:mi>
                            <mml:mrow>
                                <mml:mi>i</mml:mi>
                                <mml:mo>,</mml:mo>
                                <mml:mi>t</mml:mi>
                            </mml:mrow>
                        </mml:msub>
                        <mml:mo>=</mml:mo>
                        <mml:mo>|</mml:mo>
                        <mml:msubsup>
                            <mml:mi>y</mml:mi>
                            <mml:mi>i</mml:mi>
                            <mml:mfenced close=")" open="(">
                                <mml:mi>t</mml:mi>
                            </mml:mfenced>
                        </mml:msubsup>
                        <mml:mo>&#x2212;</mml:mo>
                        <mml:msubsup>
                            <mml:mover accent="true">
                                <mml:mi>y</mml:mi>
                                <mml:mo stretchy="true">&#x0302;</mml:mo>
                            </mml:mover>
                            <mml:mi>i</mml:mi>
                            <mml:mfenced close=")" open="(">
                                <mml:mi>t</mml:mi>
                            </mml:mfenced>
                        </mml:msubsup>
                        <mml:mo>|</mml:mo>
                        <mml:mo>,</mml:mo>
                    </mml:math>
                </disp-formula>with 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mo>|</mml:mo>
                        <mml:mo>&#x22c5;</mml:mo>
                        <mml:mo>|</mml:mo>
                    </mml:math>
                </inline-formula> denoting the absolute value, and the vector of prediction error for each sensor is, 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msub>
                            <mml:mi>&#x03b5;</mml:mi>
                            <mml:mi>i</mml:mi>
                        </mml:msub>
                        <mml:mo>&#x2208;</mml:mo>
                        <mml:msup>
                            <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                            <mml:mrow>
                                <mml:mi>T</mml:mi>
                                <mml:mo>&#x2212;</mml:mo>
                                <mml:mi>w</mml:mi>
                            </mml:mrow>
                        </mml:msup>
                    </mml:math>
                </inline-formula>. Since the error values of different sensors may vary substantially, we perform a robust normalisation of each sensor&#x2019;s errors to prevent any one sensor from overly dominating the others, that is,
                <disp-formula id="e9">
                    <mml:math display="block">
                        <mml:msub>
                            <mml:mover accent="true">
                                <mml:mi mathvariant="bold-italic">&#x03b5;</mml:mi>
                                <mml:mo stretchy="true">&#x02dc;</mml:mo>
                            </mml:mover>
                            <mml:mi>i</mml:mi>
                        </mml:msub>
                        <mml:mo>=</mml:mo>
                        <mml:mfenced close=")" open="(">
                            <mml:mfrac>
                                <mml:mrow>
                                    <mml:msub>
                                        <mml:mi mathvariant="bold-italic">&#x03b5;</mml:mi>
                                        <mml:mi>i</mml:mi>
                                    </mml:msub>
                                    <mml:mo>&#x2212;</mml:mo>
                                    <mml:mtext>Median</mml:mtext>
                                    <mml:mfenced close=")" open="(">
                                        <mml:msub>
                                            <mml:mi mathvariant="bold-italic">&#x03b5;</mml:mi>
                                            <mml:mi>i</mml:mi>
                                        </mml:msub>
                                    </mml:mfenced>
                                </mml:mrow>
                                <mml:mrow>
                                    <mml:mi>IQR</mml:mi>
                                    <mml:mfenced close=")" open="(">
                                        <mml:msub>
                                            <mml:mi mathvariant="bold-italic">&#x03b5;</mml:mi>
                                            <mml:mi>i</mml:mi>
                                        </mml:msub>
                                    </mml:mfenced>
                                </mml:mrow>
                            </mml:mfrac>
                        </mml:mfenced>
                        <mml:mo>,</mml:mo>
                    </mml:math>
                </disp-formula>where IQR denotes 
                <italic toggle="yes">inter-quartile range.</italic> In the original work by Ref. 
                <xref ref-type="bibr" rid="ref15">15</xref>, a time point 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>t</mml:mi>
                    </mml:math>
                </inline-formula> is flagged as anomalous if,
                <disp-formula id="e10">
                    <mml:math display="block">
                        <mml:mi>A</mml:mi>
                        <mml:mfenced close=")" open="(">
                            <mml:mi>t</mml:mi>
                        </mml:mfenced>
                        <mml:mo>=</mml:mo>
                        <mml:mi mathvariant="double-struck">I</mml:mi>
                        <mml:mfenced close="}" open="{">
                            <mml:mrow>
                                <mml:munder>
                                    <mml:mo>max</mml:mo>
                                    <mml:mi>i</mml:mi>
                                </mml:munder>
                                <mml:mfenced close=")" open="(">
                                    <mml:msub>
                                        <mml:mover accent="true">
                                            <mml:mi>&#x03b5;</mml:mi>
                                            <mml:mo stretchy="true">&#x02dc;</mml:mo>
                                        </mml:mover>
                                        <mml:mrow>
                                            <mml:mi>i</mml:mi>
                                            <mml:mo>,</mml:mo>
                                            <mml:mi>t</mml:mi>
                                        </mml:mrow>
                                    </mml:msub>
                                </mml:mfenced>
                                <mml:mo>&gt;</mml:mo>
                                <mml:mi>&#x03ba;</mml:mi>
                            </mml:mrow>
                        </mml:mfenced>
                        <mml:mo>,</mml:mo>
                    </mml:math>
                </disp-formula>using the notation 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msub>
                            <mml:mover accent="true">
                                <mml:mi>&#x03b5;</mml:mi>
                                <mml:mo stretchy="true">&#x02dc;</mml:mo>
                            </mml:mover>
                            <mml:mrow>
                                <mml:mi>i</mml:mi>
                                <mml:mo>,</mml:mo>
                                <mml:mi>t</mml:mi>
                            </mml:mrow>
                        </mml:msub>
                    </mml:math>
                </inline-formula> for the error value at the 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>t</mml:mi>
                    </mml:math>
                </inline-formula>-th index. Alternatively, the authors recommend using a 
                <italic toggle="yes">simple moving average</italic> of 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msub>
                            <mml:mover accent="true">
                                <mml:mi mathvariant="bold-italic">&#x03b5;</mml:mi>
                                <mml:mo stretchy="true">&#x02dc;</mml:mo>
                            </mml:mover>
                            <mml:mi>i</mml:mi>
                        </mml:msub>
                    </mml:math>
                </inline-formula> and flagging time 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>t</mml:mi>
                    </mml:math>
                </inline-formula> as anomalous if the maximum of that moving average exceeds 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>&#x03ba;</mml:mi>
                    </mml:math>
                </inline-formula>. The authors specify 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>&#x03ba;</mml:mi>
                    </mml:math>
                </inline-formula> as the maximum of the normalised errors observed on some (non-anomalous) validation data, denoted by variant epsilon, 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mover accent="true">
                            <mml:mi mathvariant="bold-italic">&#x03b5;</mml:mi>
                            <mml:mo stretchy="true">&#x02dc;</mml:mo>
                        </mml:mover>
                    </mml:math>
                </inline-formula>. However, this is applied to all sensors as a fixed threshold.</p>
            <sec id="sec3">
                <title>Sensor-based anomaly threshold: GDN+</title>
                <p>The behavior of water quality variables may differ across space, for example, water level at high-altitude river network branches are generally characterised by rainfall patterns, whereas water level downstream near a river outlet can also be influenced by tidal patterns. A fixed threshold across the network does not allow for any local adaptations in error sensitivity. For this reason, this work also considers the novel sensor-specific threshold calculation,
                    <disp-formula id="e11">
                        <mml:math display="block">
                            <mml:msub>
                                <mml:mi>A</mml:mi>
                                <mml:mi>i</mml:mi>
                            </mml:msub>
                            <mml:mfenced close=")" open="(">
                                <mml:mi>t</mml:mi>
                            </mml:mfenced>
                            <mml:mo>=</mml:mo>
                            <mml:mi mathvariant="double-struck">I</mml:mi>
                            <mml:mfenced close="}" open="{">
                                <mml:mrow>
                                    <mml:msub>
                                        <mml:mover accent="true">
                                            <mml:mi>&#x03b5;</mml:mi>
                                            <mml:mo stretchy="true">&#x02dc;</mml:mo>
                                        </mml:mover>
                                        <mml:mrow>
                                            <mml:mi>i</mml:mi>
                                            <mml:mo>,</mml:mo>
                                            <mml:mi>t</mml:mi>
                                        </mml:mrow>
                                    </mml:msub>
                                    <mml:mo>&gt;</mml:mo>
                                    <mml:msub>
                                        <mml:mi>&#x03ba;</mml:mi>
                                        <mml:mi>i</mml:mi>
                                    </mml:msub>
                                </mml:mrow>
                            </mml:mfenced>
                            <mml:mo>,</mml:mo>
                        </mml:math>
                    </disp-formula>where 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi>&#x03ba;</mml:mi>
                                <mml:mi>i</mml:mi>
                            </mml:msub>
                        </mml:math>
                    </inline-formula> is chosen such that,
                    <disp-formula id="e12">
                        <mml:math display="block">
                            <mml:mfrac>
                                <mml:mn>1</mml:mn>
                                <mml:mrow>
                                    <mml:mo>|</mml:mo>
                                    <mml:msub>
                                        <mml:mfenced close="}" open="{">
                                            <mml:msub>
                                                <mml:mover accent="true">
                                                    <mml:mi>&#x03b5;</mml:mi>
                                                    <mml:mo stretchy="true">&#x02dc;</mml:mo>
                                                </mml:mover>
                                                <mml:mrow>
                                                    <mml:mi>j</mml:mi>
                                                    <mml:mo>,</mml:mo>
                                                    <mml:mi>t</mml:mi>
                                                </mml:mrow>
                                            </mml:msub>
                                        </mml:mfenced>
                                        <mml:mrow>
                                            <mml:mi>j</mml:mi>
                                            <mml:mo>:</mml:mo>
                                            <mml:msub>
                                                <mml:mi>A</mml:mi>
                                                <mml:mi mathvariant="italic">ji</mml:mi>
                                            </mml:msub>
                                            <mml:mo>&gt;</mml:mo>
                                            <mml:mn>0</mml:mn>
                                        </mml:mrow>
                                    </mml:msub>
                                    <mml:mo>|</mml:mo>
                                </mml:mrow>
                            </mml:mfrac>
                            <mml:munder>
                                <mml:mo movablelimits="false">&#x2211;</mml:mo>
                                <mml:mrow>
                                    <mml:mi>j</mml:mi>
                                    <mml:mo>:</mml:mo>
                                    <mml:msub>
                                        <mml:mi>A</mml:mi>
                                        <mml:mi mathvariant="italic">ji</mml:mi>
                                    </mml:msub>
                                    <mml:mo>&gt;</mml:mo>
                                    <mml:mn>0</mml:mn>
                                </mml:mrow>
                            </mml:munder>
                            <mml:mi mathvariant="double-struck">I</mml:mi>
                            <mml:mfenced close="}" open="{">
                                <mml:mrow>
                                    <mml:msub>
                                        <mml:mover accent="true">
                                            <mml:mi>&#x03b5;</mml:mi>
                                            <mml:mo stretchy="true">&#x02dc;</mml:mo>
                                        </mml:mover>
                                        <mml:mrow>
                                            <mml:mi>j</mml:mi>
                                            <mml:mo>,</mml:mo>
                                            <mml:mi>t</mml:mi>
                                        </mml:mrow>
                                    </mml:msub>
                                    <mml:mo>&lt;</mml:mo>
                                    <mml:msub>
                                        <mml:mi>&#x03ba;</mml:mi>
                                        <mml:mi>i</mml:mi>
                                    </mml:msub>
                                </mml:mrow>
                            </mml:mfenced>
                            <mml:mo>=</mml:mo>
                            <mml:mi>&#x03c4;</mml:mi>
                            <mml:mo>,</mml:mo>
                        </mml:math>
                    </disp-formula>for some user-specified percentile, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>&#x03c4;</mml:mi>
                            <mml:mo>&#x2208;</mml:mo>
                            <mml:mfenced close=")" open="(">
                                <mml:mn>0,100</mml:mn>
                            </mml:mfenced>
                        </mml:math>
                    </inline-formula>, and where 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mo>|</mml:mo>
                            <mml:mo>&#x22c5;</mml:mo>
                            <mml:mo>|</mml:mo>
                        </mml:math>
                    </inline-formula> is the cardinality. In other words, the threshold for each sensor, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi>&#x03ba;</mml:mi>
                                <mml:mi>i</mml:mi>
                            </mml:msub>
                        </mml:math>
                    </inline-formula>, is set as the 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>&#x03c4;</mml:mi>
                        </mml:math>
                    </inline-formula>-th percentile of the normalised error scores across the neighbourhood of 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>i</mml:mi>
                        </mml:math>
                    </inline-formula>, on the validation data set. Unless otherwise stated, we set 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>&#x03c4;</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:mn>99</mml:mn>
                        </mml:math>
                    </inline-formula>. In this way, the sensor threshold is based only on its direct neighbourhood, as opposed to the original method which uses the global maximum, and is thus in tune with the local behaviour of the system, and more robust as a percentile. We refer to the GDN model using this variant of the threshold-based anomaly detection as GDN+.</p>
            </sec>
            <sec id="sec4">
                <title>New class of benchmarking data</title>
                <p>The following is a method for simulating synthetic datasets with persistent anomalies inspired by the statistical models recently used to model river network data.
                    <sup>
                        <xref ref-type="bibr" rid="ref9">9</xref>
                    </sup> Let 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi mathvariant="script">S</mml:mi>
                        </mml:math>
                    </inline-formula> denote an arbitrary set of individual spatial locations, with locations 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi mathvariant="bold-italic">s</mml:mi>
                                <mml:mn>1</mml:mn>
                            </mml:msub>
                            <mml:mo>,</mml:mo>
                            <mml:msub>
                                <mml:mi mathvariant="bold-italic">s</mml:mi>
                                <mml:mn>2</mml:mn>
                            </mml:msub>
                            <mml:mo>,</mml:mo>
                            <mml:mo>&#x2026;</mml:mo>
                            <mml:mo>,</mml:mo>
                            <mml:msub>
                                <mml:mi mathvariant="bold-italic">s</mml:mi>
                                <mml:mi>n</mml:mi>
                            </mml:msub>
                            <mml:mo>&#x2208;</mml:mo>
                            <mml:mi mathvariant="script">S</mml:mi>
                        </mml:math>
                    </inline-formula> chosen by experimental design
                    <sup>
                        <xref ref-type="bibr" rid="ref21">21</xref>
                    </sup> or otherwise. Consider a linear mixed model with 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>n</mml:mi>
                            <mml:mo>&#x00d7;</mml:mo>
                            <mml:mn>1</mml:mn>
                        </mml:math>
                    </inline-formula> response 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi mathvariant="bold-italic">Y</mml:mi>
                        </mml:math>
                    </inline-formula>, and 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>n</mml:mi>
                            <mml:mo>&#x00d7;</mml:mo>
                            <mml:mi>m</mml:mi>
                        </mml:math>
                    </inline-formula> design matrix 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi mathvariant="bold-italic">X</mml:mi>
                        </mml:math>
                    </inline-formula> of explanatory variables spatially indexed at locations 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi mathvariant="bold-italic">s</mml:mi>
                                <mml:mn>1</mml:mn>
                            </mml:msub>
                            <mml:mo>,</mml:mo>
                            <mml:msub>
                                <mml:mi mathvariant="bold-italic">s</mml:mi>
                                <mml:mn>2</mml:mn>
                            </mml:msub>
                            <mml:mo>,</mml:mo>
                            <mml:mo>&#x2026;</mml:mo>
                            <mml:mo>,</mml:mo>
                            <mml:msub>
                                <mml:mi mathvariant="bold-italic">s</mml:mi>
                                <mml:mi>n</mml:mi>
                            </mml:msub>
                        </mml:math>
                    </inline-formula>,
                    <disp-formula id="e13">
                        <mml:math display="block">
                            <mml:msub>
                                <mml:mi mathvariant="bold-italic">Y</mml:mi>
                                <mml:mi>t</mml:mi>
                            </mml:msub>
                            <mml:mo>=</mml:mo>
                            <mml:msub>
                                <mml:mi mathvariant="bold-italic">&#x03b2;</mml:mi>
                                <mml:mn>0</mml:mn>
                            </mml:msub>
                            <mml:mo>+</mml:mo>
                            <mml:msub>
                                <mml:mi mathvariant="bold-italic">X</mml:mi>
                                <mml:mi>t</mml:mi>
                            </mml:msub>
                            <mml:mi mathvariant="bold-italic">&#x03b2;</mml:mi>
                            <mml:mo>+</mml:mo>
                            <mml:mi mathvariant="normal">Z</mml:mi>
                            <mml:mo>+</mml:mo>
                            <mml:msub>
                                <mml:mi>&#x03b5;</mml:mi>
                                <mml:mn mathvariant="bold">0</mml:mn>
                            </mml:msub>
                            <mml:mo>,</mml:mo>
                            <mml:mspace width="1em"/>
                            <mml:mi>t</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:mn>1</mml:mn>
                            <mml:mo>,</mml:mo>
                            <mml:mo>&#x2026;</mml:mo>
                            <mml:mo>,</mml:mo>
                            <mml:mi>T</mml:mi>
                            <mml:mo>,</mml:mo>
                        </mml:math>
                        <label>(2)</label>
                    </disp-formula>with time-homogeneous spatially-correlated random effects 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi mathvariant="bold-italic">Z</mml:mi>
                            <mml:mo>&#x223c;</mml:mo>
                            <mml:mi mathvariant="script">N</mml:mi>
                            <mml:mfenced close=")" open="(" separators=",">
                                <mml:mn mathvariant="bold">0</mml:mn>
                                <mml:msub>
                                    <mml:mi mathvariant="bold">&#x03a3;</mml:mi>
                                    <mml:mi mathvariant="bold-italic">Z</mml:mi>
                                </mml:msub>
                            </mml:mfenced>
                        </mml:math>
                    </inline-formula> and vector of independent noise terms 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi>&#x03b5;</mml:mi>
                                <mml:mn mathvariant="bold">0</mml:mn>
                            </mml:msub>
                            <mml:mo>&#x223c;</mml:mo>
                            <mml:mi mathvariant="script">N</mml:mi>
                            <mml:mfenced close=")" open="(" separators=",">
                                <mml:mn mathvariant="bold">0</mml:mn>
                                <mml:mrow>
                                    <mml:msubsup>
                                        <mml:mi>&#x03c3;</mml:mi>
                                        <mml:mn>0</mml:mn>
                                        <mml:mn>2</mml:mn>
                                    </mml:msubsup>
                                    <mml:mi mathvariant="normal">I</mml:mi>
                                </mml:mrow>
                            </mml:mfenced>
                        </mml:math>
                    </inline-formula>, yielding 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mspace width="0.1em"/>
                            <mml:mi>Cov</mml:mi>
                            <mml:mspace width="0.2em"/>
                            <mml:mfenced close="]" open="[" separators="|">
                                <mml:msub>
                                    <mml:mi mathvariant="bold-italic">Y</mml:mi>
                                    <mml:mi>t</mml:mi>
                                </mml:msub>
                                <mml:mrow>
                                    <mml:msub>
                                        <mml:mi mathvariant="bold-italic">X</mml:mi>
                                        <mml:mi>t</mml:mi>
                                    </mml:msub>
                                    <mml:mo>=</mml:mo>
                                    <mml:msub>
                                        <mml:mi mathvariant="bold-italic">x</mml:mi>
                                        <mml:mi>t</mml:mi>
                                    </mml:msub>
                                </mml:mrow>
                            </mml:mfenced>
                            <mml:mo>=</mml:mo>
                            <mml:msub>
                                <mml:mi mathvariant="bold">&#x03a3;</mml:mi>
                                <mml:mi mathvariant="bold-italic">Z</mml:mi>
                            </mml:msub>
                            <mml:mo>+</mml:mo>
                            <mml:msubsup>
                                <mml:mi>&#x03c3;</mml:mi>
                                <mml:mn>0</mml:mn>
                                <mml:mn>2</mml:mn>
                            </mml:msubsup>
                            <mml:mi mathvariant="normal">I</mml:mi>
                        </mml:math>
                    </inline-formula>, where 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi mathvariant="normal">I</mml:mi>
                        </mml:math>
                    </inline-formula> denotes the 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>n</mml:mi>
                            <mml:mo>&#x00d7;</mml:mo>
                            <mml:mi>n</mml:mi>
                        </mml:math>
                    </inline-formula> identity matrix and 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi mathvariant="bold-italic">&#x03b5;</mml:mi>
                                <mml:mn>0</mml:mn>
                            </mml:msub>
                        </mml:math>
                    </inline-formula> is an error term. The covariates 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi mathvariant="bold-italic">X</mml:mi>
                                <mml:mi>t</mml:mi>
                            </mml:msub>
                        </mml:math>
                    </inline-formula> for 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>t</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:mn>1</mml:mn>
                            <mml:mo>,</mml:mo>
                            <mml:mo>&#x2026;</mml:mo>
                            <mml:mo>,</mml:mo>
                            <mml:mi>T</mml:mi>
                        </mml:math>
                    </inline-formula> are simulated according to an autoregressive process based on an underlying sequence of independent random fields,
                    <disp-formula id="e14">
                        <mml:math display="block">
                            <mml:msub>
                                <mml:mi mathvariant="bold-italic">X</mml:mi>
                                <mml:mi>t</mml:mi>
                            </mml:msub>
                            <mml:mo>=</mml:mo>
                            <mml:msubsup>
                                <mml:mi mathvariant="normal">&#x03a3;</mml:mi>
                                <mml:mrow>
                                    <mml:mi>i</mml:mi>
                                    <mml:mo>=</mml:mo>
                                    <mml:mn>0</mml:mn>
                                </mml:mrow>
                                <mml:mi>p</mml:mi>
                            </mml:msubsup>
                            <mml:msub>
                                <mml:mi>&#x03c6;</mml:mi>
                                <mml:mi>i</mml:mi>
                            </mml:msub>
                            <mml:msub>
                                <mml:mover accent="true">
                                    <mml:mi mathvariant="bold-italic">X</mml:mi>
                                    <mml:mo stretchy="true">&#x02dc;</mml:mo>
                                </mml:mover>
                                <mml:mrow>
                                    <mml:mi>t</mml:mi>
                                    <mml:mo>&#x2212;</mml:mo>
                                    <mml:mi>i</mml:mi>
                                </mml:mrow>
                            </mml:msub>
                            <mml:mo>,</mml:mo>
                        </mml:math>
                        <label>(3)</label>
                    </disp-formula>where 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>p</mml:mi>
                        </mml:math>
                    </inline-formula> is the order of the autoregressive process, and
                    <disp-formula id="e15">
                        <mml:math display="block">
                            <mml:msub>
                                <mml:mover accent="true">
                                    <mml:mi mathvariant="bold-italic">X</mml:mi>
                                    <mml:mo stretchy="true">&#x02dc;</mml:mo>
                                </mml:mover>
                                <mml:mi>t</mml:mi>
                            </mml:msub>
                            <mml:mover>
                                <mml:mo>&#x223c;</mml:mo>
                                <mml:mi>iid</mml:mi>
                            </mml:mover>
                            <mml:mi mathvariant="script">N</mml:mi>
                            <mml:mfenced close=")" open="(" separators=",">
                                <mml:mn mathvariant="bold">0</mml:mn>
                                <mml:msub>
                                    <mml:mi mathvariant="bold">&#x03a3;</mml:mi>
                                    <mml:mi mathvariant="bold-italic">X</mml:mi>
                                </mml:msub>
                            </mml:mfenced>
                            <mml:mo>,</mml:mo>
                            <mml:mspace width="1em"/>
                            <mml:mi>t</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:mn>1</mml:mn>
                            <mml:mo>,</mml:mo>
                            <mml:mo>&#x2026;</mml:mo>
                            <mml:mo>,</mml:mo>
                            <mml:mi>T</mml:mi>
                            <mml:mo>.</mml:mo>
                        </mml:math>
                    </disp-formula>
                </p>
                <p>Note that other distributions may be used. Above,
                    <disp-formula id="e16">
                        <mml:math display="block">
                            <mml:msub>
                                <mml:mfenced close=")" open="(">
                                    <mml:msub>
                                        <mml:mi mathvariant="bold">&#x03a3;</mml:mi>
                                        <mml:mi mathvariant="bold-italic">X</mml:mi>
                                    </mml:msub>
                                </mml:mfenced>
                                <mml:mi mathvariant="italic">ij</mml:mi>
                            </mml:msub>
                            <mml:mo>=</mml:mo>
                            <mml:mi>k</mml:mi>
                            <mml:mfenced close=")" open="(" separators=",">
                                <mml:msub>
                                    <mml:mi mathvariant="bold-italic">s</mml:mi>
                                    <mml:mi>i</mml:mi>
                                </mml:msub>
                                <mml:msubsup>
                                    <mml:mi mathvariant="bold-italic">s</mml:mi>
                                    <mml:mi>j</mml:mi>
                                    <mml:mo>&#x2032;</mml:mo>
                                </mml:msubsup>
                            </mml:mfenced>
                        </mml:math>
                    </disp-formula>for some covariance kernel 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>k</mml:mi>
                        </mml:math>
                    </inline-formula>. For example,
                    <disp-formula id="e17">
                        <mml:math display="block">
                            <mml:mi>k</mml:mi>
                            <mml:mfenced close=")" open="(" separators=",;,">
                                <mml:mi mathvariant="bold">s</mml:mi>
                                <mml:msup>
                                    <mml:mi mathvariant="bold">s</mml:mi>
                                    <mml:mo>&#x2032;</mml:mo>
                                </mml:msup>
                                <mml:mi>&#x03c3;</mml:mi>
                                <mml:mi>&#x03b1;</mml:mi>
                            </mml:mfenced>
                            <mml:mo>=</mml:mo>
                            <mml:msup>
                                <mml:mi>&#x03c3;</mml:mi>
                                <mml:mn>2</mml:mn>
                            </mml:msup>
                            <mml:mo>exp</mml:mo>
                            <mml:mfenced close=")" open="(">
                                <mml:mrow>
                                    <mml:mo>&#x2212;</mml:mo>
                                    <mml:mfrac>
                                        <mml:msup>
                                            <mml:mfenced close="&#x2016;" open="&#x2016;">
                                                <mml:mrow>
                                                    <mml:mi mathvariant="bold">s</mml:mi>
                                                    <mml:mo>&#x2212;</mml:mo>
                                                    <mml:msup>
                                                        <mml:mi mathvariant="bold">s</mml:mi>
                                                        <mml:mo>&#x2032;</mml:mo>
                                                    </mml:msup>
                                                </mml:mrow>
                                            </mml:mfenced>
                                            <mml:mn>2</mml:mn>
                                        </mml:msup>
                                        <mml:mi>&#x03b1;</mml:mi>
                                    </mml:mfrac>
                                </mml:mrow>
                            </mml:mfenced>
                            <mml:mo>,</mml:mo>
                        </mml:math>
                        <label>(4)</label>
                    </disp-formula>where 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mo stretchy="true">&#x2016;</mml:mo>
                            <mml:mo>&#x22c5;</mml:mo>
                            <mml:mo stretchy="true">&#x2016;</mml:mo>
                        </mml:math>
                    </inline-formula> denotes the Euclidean norm, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msup>
                                <mml:mi>&#x03c3;</mml:mi>
                                <mml:mn>2</mml:mn>
                            </mml:msup>
                            <mml:mo>&gt;</mml:mo>
                            <mml:mn>0</mml:mn>
                        </mml:math>
                    </inline-formula> is the covariance-scaling parameter, and 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>&#x03b1;</mml:mi>
                            <mml:mo>&#x2208;</mml:mo>
                            <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                        </mml:math>
                    </inline-formula> is the range parameter that controls the rate of decay of correlation between points over distance. 
                    <xref ref-type="fig" rid="f1">Figure 1</xref> illustrates an example of a generated Gaussian random field evolving over time.</p>
                <fig fig-type="figure" id="f1" orientation="portrait" position="float">
                    <label>Figure 1. </label>
                    <caption>
                        <title>Smooth random Gaussian field used to generate covariate values, 
                            <inline-formula>
                                <mml:math display="inline">
                                    <mml:mi mathvariant="bold-italic">X</mml:mi>
                                </mml:math>
                            </inline-formula>, in the simulation studies.</title>
                        <p>Examples of the field are shown for 
                            <inline-formula>
                                <mml:math display="inline">
                                    <mml:mi>t</mml:mi>
                                    <mml:mo>=</mml:mo>
                                    <mml:mn>1,2,3</mml:mn>
                                </mml:math>
                            </inline-formula>. Sensor locations are shown as white dots.</p>
                    </caption>
                    <graphic id="gr1" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/149233/d8a1db05-aa51-4579-95b0-af83fdacf520_figure1.gif"/>
                </fig>
                <p>We consider two scenarios in generating simulated data: 1) spatial relationships characterised by Euclidean distance only, where the covariance matrix, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi mathvariant="bold">&#x03a3;</mml:mi>
                                <mml:mi>Z</mml:mi>
                            </mml:msub>
                        </mml:math>
                    </inline-formula>, is constructed via the kernel function given in 
                    <xref ref-type="disp-formula" rid="e17">Equation 4</xref>, and 
                    <xref ref-type="disp-formula" rid="e13">2</xref>) data simulated on a river network, where 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi mathvariant="bold">&#x03a3;</mml:mi>
                                <mml:mi>Z</mml:mi>
                            </mml:msub>
                        </mml:math>
                    </inline-formula> is constructed via the kernel function given in 
                    <xref ref-type="disp-formula" rid="e18">Equation 5</xref>. Together, this approach provides a variety of simulated data with highly-sophisticated dependency structure, see 
                    <xref ref-type="fig" rid="f2">Figure 2</xref>.</p>
                <fig fig-type="figure" id="f2" orientation="portrait" position="float">
                    <label>Figure 2. </label>
                    <caption>
                        <title>
                            <italic toggle="yes">SimEuc</italic> site locations across space (left) and 
                            <italic toggle="yes">SimRiver</italic> site locations along a river network (right).</title>
                        <p>Both simulations use the same (x, y) coordinates for sensor locations. The direction of flow is from top to bottom for SimRiver. Red dots indicate sites for which time series have been shown in 
                            <xref ref-type="fig" rid="f3">Figure 3</xref> and 
                            <xref ref-type="fig" rid="f4">Figure 4</xref>.</p>
                    </caption>
                    <graphic id="gr2" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/149233/d8a1db05-aa51-4579-95b0-af83fdacf520_figure2.gif"/>
                </fig>
                <p>Two points 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi mathvariant="bold-italic">s</mml:mi>
                                <mml:mi>i</mml:mi>
                            </mml:msub>
                        </mml:math>
                    </inline-formula>, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi mathvariant="bold-italic">s</mml:mi>
                                <mml:mi>j</mml:mi>
                            </mml:msub>
                        </mml:math>
                    </inline-formula> on a river network are said to be 
                    <italic toggle="yes">flow-connected</italic> if they share water flow, and 
                    <italic toggle="yes">flow-unconnected</italic> otherwise. We define stream distance, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi>h</mml:mi>
                                <mml:mi mathvariant="italic">Riv</mml:mi>
                            </mml:msub>
                            <mml:mfenced close=")" open="(" separators=",">
                                <mml:msub>
                                    <mml:mi mathvariant="bold-italic">s</mml:mi>
                                    <mml:mi>i</mml:mi>
                                </mml:msub>
                                <mml:msub>
                                    <mml:mi mathvariant="bold-italic">s</mml:mi>
                                    <mml:mi>j</mml:mi>
                                </mml:msub>
                            </mml:mfenced>
                        </mml:math>
                    </inline-formula>, as the shortest distance separating 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi mathvariant="bold-italic">s</mml:mi>
                                <mml:mi>i</mml:mi>
                            </mml:msub>
                        </mml:math>
                    </inline-formula> and 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi mathvariant="bold-italic">s</mml:mi>
                                <mml:mi>j</mml:mi>
                            </mml:msub>
                        </mml:math>
                    </inline-formula> when travelling 
                    <italic toggle="yes">along</italic> a given river network. Tail-up covariance models for river networks, introduced in,
                    <sup>
                        <xref ref-type="bibr" rid="ref8">8</xref>
                    </sup> effectively represent spatial relationships when variables are dominated by flow (e.g. pollutants enter a stream and only impact downstream locations). By construction, the tail-up covariance function only allows for correlation between flow-connected sites:
                    <disp-formula id="e18">
                        <mml:math display="block">
                            <mml:msub>
                                <mml:mi>k</mml:mi>
                                <mml:mi>TU</mml:mi>
                            </mml:msub>
                            <mml:mfenced close=")" open="(" separators=",;,">
                                <mml:msub>
                                    <mml:mi mathvariant="bold-italic">s</mml:mi>
                                    <mml:mi>i</mml:mi>
                                </mml:msub>
                                <mml:msub>
                                    <mml:mi mathvariant="bold-italic">s</mml:mi>
                                    <mml:mi>j</mml:mi>
                                </mml:msub>
                                <mml:mi>&#x03c3;</mml:mi>
                                <mml:mi>&#x03b1;</mml:mi>
                            </mml:mfenced>
                            <mml:mo>=</mml:mo>
                            <mml:msub>
                                <mml:mi>&#x03c9;</mml:mi>
                                <mml:mi mathvariant="italic">ij</mml:mi>
                            </mml:msub>
                            <mml:msup>
                                <mml:mi>&#x03c3;</mml:mi>
                                <mml:mn>2</mml:mn>
                            </mml:msup>
                            <mml:mo>exp</mml:mo>
                            <mml:mfenced close=")" open="(">
                                <mml:mrow>
                                    <mml:mo>&#x2212;</mml:mo>
                                    <mml:mfrac>
                                        <mml:mrow>
                                            <mml:msub>
                                                <mml:mi>h</mml:mi>
                                                <mml:mi>Riv</mml:mi>
                                            </mml:msub>
                                            <mml:mfenced close=")" open="(" separators=",">
                                                <mml:msub>
                                                    <mml:mi mathvariant="bold-italic">s</mml:mi>
                                                    <mml:mi>i</mml:mi>
                                                </mml:msub>
                                                <mml:msub>
                                                    <mml:mi mathvariant="bold-italic">s</mml:mi>
                                                    <mml:mi>j</mml:mi>
                                                </mml:msub>
                                            </mml:mfenced>
                                        </mml:mrow>
                                        <mml:mi>&#x03b1;</mml:mi>
                                    </mml:mfrac>
                                </mml:mrow>
                            </mml:mfenced>
                            <mml:msub>
                                <mml:mi mathvariant="script">F</mml:mi>
                                <mml:mi mathvariant="italic">ij</mml:mi>
                            </mml:msub>
                            <mml:mo>,</mml:mo>
                        </mml:math>
                        <label>(5)</label>
                    </disp-formula>where 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi mathvariant="script">F</mml:mi>
                                <mml:mi mathvariant="italic">ij</mml:mi>
                            </mml:msub>
                        </mml:math>
                    </inline-formula> is equal to one if 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi>s</mml:mi>
                                <mml:mi>i</mml:mi>
                            </mml:msub>
                        </mml:math>
                    </inline-formula> and 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi>s</mml:mi>
                                <mml:mi>j</mml:mi>
                            </mml:msub>
                        </mml:math>
                    </inline-formula> are flow-connected, and zero otherwise, and 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi>&#x03c9;</mml:mi>
                                <mml:mi mathvariant="italic">ij</mml:mi>
                            </mml:msub>
                        </mml:math>
                    </inline-formula> is a weighting attributed to each stream segment to account for the upstream branching network structure and ensure stationarity in variance (for full details, see Refs. 
                    <xref ref-type="bibr" rid="ref22">22</xref>, 
                    <xref ref-type="bibr" rid="ref23">23</xref>). The weightings corresponding to each segment may incorporate flow volume, or the area of the catchment, or a proxy such as stream order.
                    <sup>
                        <xref ref-type="bibr" rid="ref24">24</xref>
                    </sup> Note that there are various choices of covariance models. Tail-down models allow correlation between both flow-connected and flow-unconnected locations, and may be more suitable for water variables such as temperature, or organisms that can move both upstream and downstream.
                    <sup>
                        <xref ref-type="bibr" rid="ref25">25</xref>
                    </sup> Here we use the exponential function for decay, for further covariance model examples see Ref. 
                    <xref ref-type="bibr" rid="ref8">8</xref>.</p>
                <p>Once the base multivariate time series is constructed as above, it is modified to include persistent anomalies as follows. Hyperparameters are, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi>n</mml:mi>
                                <mml:mtext>anomaly</mml:mtext>
                            </mml:msub>
                            <mml:mo>&#x2265;</mml:mo>
                            <mml:mn>0</mml:mn>
                        </mml:math>
                    </inline-formula>, the number of subsequence anomalies and, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi>&#x03bb;</mml:mi>
                                <mml:mtext>anomaly</mml:mtext>
                            </mml:msub>
                            <mml:mo>&gt;</mml:mo>
                            <mml:mn>0</mml:mn>
                        </mml:math>
                    </inline-formula>, the average length of an anomalous subsequence, for each anomaly type. In this example, we consider two types of anomalies, high-variability and drift, see 
                    <xref ref-type="fig" rid="B1">Algorithm 1</xref>.</p>
                <boxed-text id="B1" orientation="portrait" position="float">
                    <label>Algorithm 1. </label>
                    <caption>
                        <title>Two-type Anomaly Generation.</title>
                    </caption>
                    <p>
                        <bold>input :</bold> Time series data 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mi mathvariant="normal">Y</mml:mi>
                                <mml:mo>=</mml:mo>
                                <mml:mfenced close="]" open="[" separators=",,">
                                    <mml:msup>
                                        <mml:mi mathvariant="bold-italic">y</mml:mi>
                                        <mml:mfenced close=")" open="(">
                                            <mml:mn>1</mml:mn>
                                        </mml:mfenced>
                                    </mml:msup>
                                    <mml:mo>&#x2026;</mml:mo>
                                    <mml:msup>
                                        <mml:mi mathvariant="bold-italic">y</mml:mi>
                                        <mml:mfenced close=")" open="(">
                                            <mml:mi>T</mml:mi>
                                        </mml:mfenced>
                                    </mml:msup>
                                </mml:mfenced>
                            </mml:math>
                        </inline-formula>, number of locations 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mi>n</mml:mi>
                            </mml:math>
                        </inline-formula>, expected length of each anomaly type 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:msub>
                                    <mml:mi>&#x03bb;</mml:mi>
                                    <mml:mtext>drift</mml:mtext>
                                </mml:msub>
                            </mml:math>
                        </inline-formula> and 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:msub>
                                    <mml:mi>&#x03bb;</mml:mi>
                                    <mml:mi>var</mml:mi>
                                </mml:msub>
                            </mml:math>
                        </inline-formula>, number of anomalies, 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:msub>
                                    <mml:mi>n</mml:mi>
                                    <mml:mtext>drift</mml:mtext>
                                </mml:msub>
                            </mml:math>
                        </inline-formula> and 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:msub>
                                    <mml:mi>n</mml:mi>
                                    <mml:mi>var</mml:mi>
                                </mml:msub>
                            </mml:math>
                        </inline-formula>, variability anomaly scale 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mi>&#x03b6;</mml:mi>
                            </mml:math>
                        </inline-formula>, and drift anomaly parameter 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mi>&#x03b4;</mml:mi>
                            </mml:math>
                        </inline-formula>.</p>
                    <p>
                        <bold>for</bold> 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mtext>anomaly</mml:mtext>
                                <mml:mo>&#x2208;</mml:mo>
                                <mml:mfenced close="}" open="{" separators=",">
                                    <mml:mtext>drift</mml:mtext>
                                    <mml:mo>var</mml:mo>
                                </mml:mfenced>
                            </mml:math>
                        </inline-formula> 
                        <bold>do</bold>
                    </p>
                    <p>&#x2003;&#x2003;&#x2002;
                        <bold>for</bold> 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mi>i</mml:mi>
                                <mml:mo>=</mml:mo>
                                <mml:mn>1</mml:mn>
                                <mml:mo>,</mml:mo>
                                <mml:mo>&#x2026;</mml:mo>
                                <mml:mo>,</mml:mo>
                                <mml:msub>
                                    <mml:mi>n</mml:mi>
                                    <mml:mtext>anomaly</mml:mtext>
                                </mml:msub>
                            </mml:math>
                        </inline-formula> 
                        <bold>do</bold>
                    </p>
                    <p>&#x2003;&#x2003;&#x2003;&#x2003;Draw location 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mi>S</mml:mi>
                                <mml:mo>&#x223c;</mml:mo>
                                <mml:mtext>Uniform</mml:mtext>
                                <mml:mfenced close=")" open="(">
                                    <mml:mfenced close="}" open="{" separators=",,,">
                                        <mml:mn>1</mml:mn>
                                        <mml:mn>2</mml:mn>
                                        <mml:mo>&#x2026;</mml:mo>
                                        <mml:mi>n</mml:mi>
                                    </mml:mfenced>
                                </mml:mfenced>
                            </mml:math>
                        </inline-formula>
                    </p>
                    <p>&#x2003;&#x2003;&#x2003;&#x2003;Draw time 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mi>t</mml:mi>
                                <mml:mo>&#x223c;</mml:mo>
                                <mml:mtext>Uniform</mml:mtext>
                                <mml:mfenced close=")" open="(">
                                    <mml:mfenced close="}" open="{" separators=",,">
                                        <mml:mn>1</mml:mn>
                                        <mml:mo>&#x2026;</mml:mo>
                                        <mml:mi>T</mml:mi>
                                    </mml:mfenced>
                                </mml:mfenced>
                            </mml:math>
                        </inline-formula>
                    </p>
                    <p>&#x2003;&#x2003;&#x2003;&#x2003;Draw length 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mi>L</mml:mi>
                                <mml:mo>&#x223c;</mml:mo>
                                <mml:mtext>Poisson</mml:mtext>
                                <mml:mfenced close=")" open="(">
                                    <mml:msub>
                                        <mml:mi>&#x03bb;</mml:mi>
                                        <mml:mtext>anomaly</mml:mtext>
                                    </mml:msub>
                                </mml:mfenced>
                            </mml:math>
                        </inline-formula>
                    </p>
                    <p>&#x2003;&#x2003;&#x2003;&#x2003;
                        <bold>if</bold> 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mtext>anomaly</mml:mtext>
                                <mml:mo>=</mml:mo>
                                <mml:mtext>drift</mml:mtext>
                            </mml:math>
                        </inline-formula> 
                        <bold>then</bold>
                    </p>
                    <p>&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mi mathvariant="bold-italic">v</mml:mi>
                                <mml:mo>&#x2190;</mml:mo>
                                <mml:mfenced close=")" open="(" separators=",,,">
                                    <mml:mi>&#x03b4;</mml:mi>
                                    <mml:mrow>
                                        <mml:mn>2</mml:mn>
                                        <mml:mi>&#x03b4;</mml:mi>
                                    </mml:mrow>
                                    <mml:mo>&#x2026;</mml:mo>
                                    <mml:mi mathvariant="italic">L&#x03b4;</mml:mi>
                                </mml:mfenced>
                            </mml:math>
                        </inline-formula>&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;
                        <monospace>// drift</monospace>
                    </p>
                    <p>&#x2003;&#x2003;&#x2003;&#x2003;
                        <bold>else</bold>
                    </p>
                    <p>&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mi mathvariant="bold-italic">v</mml:mi>
                                <mml:mo>&#x223c;</mml:mo>
                                <mml:mi mathvariant="script">N</mml:mi>
                                <mml:mfenced close=")" open="(" separators=",">
                                    <mml:mn mathvariant="bold">0</mml:mn>
                                    <mml:mrow>
                                        <mml:msup>
                                            <mml:mi>&#x03b6;</mml:mi>
                                            <mml:mn>2</mml:mn>
                                        </mml:msup>
                                        <mml:msub>
                                            <mml:mi mathvariant="normal">I</mml:mi>
                                            <mml:mrow>
                                                <mml:mi>L</mml:mi>
                                                <mml:mo>&#x00d7;</mml:mo>
                                                <mml:mi>L</mml:mi>
                                            </mml:mrow>
                                        </mml:msub>
                                    </mml:mrow>
                                </mml:mfenced>
                            </mml:math>
                        </inline-formula>&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;
                        <monospace>// variability</monospace>
                    </p>
                    <p>&#x2003;&#x2003;&#x2003;&#x2003;
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:msub>
                                    <mml:mi mathvariant="bold-italic">y</mml:mi>
                                    <mml:mrow>
                                        <mml:mi>S</mml:mi>
                                        <mml:mo>,</mml:mo>
                                        <mml:mi>t</mml:mi>
                                        <mml:mo>:</mml:mo>
                                        <mml:mfenced close=")" open="(">
                                            <mml:mrow>
                                                <mml:mi>t</mml:mi>
                                                <mml:mo>+</mml:mo>
                                                <mml:mi>L</mml:mi>
                                            </mml:mrow>
                                        </mml:mfenced>
                                    </mml:mrow>
                                </mml:msub>
                                <mml:mo>&#x2190;</mml:mo>
                                <mml:msub>
                                    <mml:mi mathvariant="bold-italic">y</mml:mi>
                                    <mml:mrow>
                                        <mml:mi>S</mml:mi>
                                        <mml:mo>,</mml:mo>
                                        <mml:mi>t</mml:mi>
                                        <mml:mo>:</mml:mo>
                                        <mml:mfenced close=")" open="(">
                                            <mml:mrow>
                                                <mml:mi>t</mml:mi>
                                                <mml:mo>+</mml:mo>
                                                <mml:mi>L</mml:mi>
                                            </mml:mrow>
                                        </mml:mfenced>
                                    </mml:mrow>
                                </mml:msub>
                                <mml:mo>+</mml:mo>
                                <mml:mi mathvariant="bold-italic">v</mml:mi>
                            </mml:math>
                        </inline-formula>
                    </p>
                </boxed-text>
            </sec>
            <sec id="sec5">
                <title>Python Package: Graph-Based Neural Network Anomaly Dectection (gnnad)</title>
                <p>The Python package gnnad introduced in this paper extends and generalises the research code originally implemented by Ref. 
                    <xref ref-type="bibr" rid="ref15">15</xref>, which is incompatible with newer package dependencies and offers only a command line interface. The code is refactored to be modular and user-friendly, with a scikit-inspired interface, and extended to include visualisation, data and anomaly generation modules, as well as the GDN+ model extension. A continuous integration/continuous deployment (CI/CP) pipeline is established with unit testing to ensure that changes to the code are tested and deployed efficiently. Comprehensive documentation now accompanying the codebase enhances readability for future developers, facilitating maintenance, reuse, and modification. Furthermore, rigorous error handling is implemented to improve the software experience. The software developments have resulted in a more robust, user-friendly and easily distributable package that is available via 
                    <ext-link ext-link-type="uri" xlink:href="https://github.com/KatieBuc/gnnad">
                        <monospace>https://github.com/KatieBuc/gnnad</monospace>
                    </ext-link> and the pip repository, gnnad. See below for example code that shows the process of fitting the GDN+ model.</p>
                <p>from sklearn.model_selection import train_test_split</p>
                <p>from gnnad.graphanomaly import GNNAD</p>
                <p>from gnnad.generate import GenerateAnomaly</p>
                <p># split train test for input data frame, X</p>
                <p>X_train, X_test = train_test_split(X, shuffle=False)</p>
                <p># generate anomalies on test set</p>
                <p>anoms = GenerateAnomaly(X_test)</p>
                <p>X_test = anoms.generate(anoms.variability, lam=3, prop_anom=0.07, seed=45)</p>
                <p>X_test = anoms.generate(anoms.drift, lam=11, prop_anom=0.07, seed=234)</p>
                <p>y_test = anoms.get_labels()</p>
                <p># instantiate and fit GDN model object</p>
                <p>model = GNNAD(threshold_type="max_validation", topk=6, slide_win=200)</p>
                <p>fitted_model = model.fit(X_train, X_test, y_test)</p>
                <p># sensor based threshold based on GDN+</p>
                <p>pred_label = fitted_model.sensor_threshold_preds(tau=99)</p>
                <p># print evaluation metrics</p>
                <p>fitted_model.print_eval_metrics(pred_label)</p>
            </sec>
        </sec>
        <sec id="sec6" sec-type="results">
            <title>Results</title>
            <p>This section presents a summary of the main findings for anomaly detection using GDN/GDN+ on both simulated and real-world data. To ensure quality of data, the aim for practitioners is to maximise the ability to identify anomalies of different types, while minimising false detection rates. We define the following metrics in terms of true positive (TP), true negative (TN), false positive (FP) and false negative (FN) classifications. In other words, the main priority is to minimise FN, while maintaining a reasonable number of FP, such that it is not an operational burden to check the total number of positive flags. Accordingly, we use recall, defined by 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mfrac>
                            <mml:mi>TP</mml:mi>
                            <mml:mrow>
                                <mml:mi>TP</mml:mi>
                                <mml:mo>+</mml:mo>
                                <mml:mi>FN</mml:mi>
                            </mml:mrow>
                        </mml:mfrac>
                    </mml:math>
                </inline-formula>, to evaluate the performance on the test set and to select hyperparameters. That is, the proportion of actual positive cases that were correctly identified by the model(s). We also report the performance using precision 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mfenced close=")" open="(">
                            <mml:mfrac>
                                <mml:mi>TP</mml:mi>
                                <mml:mrow>
                                    <mml:mi>TP</mml:mi>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi>FP</mml:mi>
                                </mml:mrow>
                            </mml:mfrac>
                        </mml:mfenced>
                    </mml:math>
                </inline-formula>, accuracy 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mfenced close=")" open="(">
                            <mml:mfrac>
                                <mml:mrow>
                                    <mml:mi>TP</mml:mi>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi>TN</mml:mi>
                                </mml:mrow>
                                <mml:mrow>
                                    <mml:mi>TP</mml:mi>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi>TN</mml:mi>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi>FP</mml:mi>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi>FN</mml:mi>
                                </mml:mrow>
                            </mml:mfrac>
                        </mml:mfenced>
                    </mml:math>
                </inline-formula> and specificity 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mfenced close=")" open="(">
                            <mml:mfrac>
                                <mml:mi>TN</mml:mi>
                                <mml:mrow>
                                    <mml:mi>TN</mml:mi>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi>FP</mml:mi>
                                </mml:mrow>
                            </mml:mfrac>
                        </mml:mfenced>
                    </mml:math>
                </inline-formula>.</p>
            <p>To evaluate model performance, three existing anomaly detection models are used as benchmarks: 1. The naive (random walk) Autoregressive Integrated Moving Average Model (ARIMA) prediction model from Refs. 
                <xref ref-type="bibr" rid="ref7">7</xref>, 
                <xref ref-type="bibr" rid="ref26">26</xref>
                <sup/>; 2. HDoutliers,
                <sup>
                    <xref ref-type="bibr" rid="ref17">17</xref>
                </sup> an unsupervised algorithm designed to identify anomalies in high-dimensional data, based on a distributional model that allows for probability assignment to an anomaly; and 3. DeepAnT,
                <sup>
                    <xref ref-type="bibr" rid="ref14">14</xref>
                </sup> an unsupervised, deep learning-based approach to detecting anomalies in time series data.</p>
            <sec id="sec7">
                <title>Simulation study: Benchmark data</title>
                <p>Data are generated using the linear-mixed model described in 
                    <xref ref-type="disp-formula" rid="e13">Equation 2</xref>, with differing spatial dynamics: 
                    <italic toggle="yes">SimEuc</italic> where the random effect, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi mathvariant="bold-italic">Z</mml:mi>
                        </mml:math>
                    </inline-formula>, is characterised by Euclidean distance only, and 
                    <italic toggle="yes">SimRiver</italic> where 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi mathvariant="bold-italic">Z</mml:mi>
                        </mml:math>
                    </inline-formula> simulates complex river network dynamics,
                    <sup>
                        <xref ref-type="bibr" rid="ref27">27</xref>
                    </sup> using the same site locations and covariate values, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi mathvariant="bold-italic">X</mml:mi>
                        </mml:math>
                    </inline-formula>. Detecting anomalies that involve multiple consecutive observations is a difficult task that often requires user intervention, and is the focus of this study. We consider scenarios with drift and high-variability anomaly types, which together contaminate 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mn>13.2</mml:mn>
                            <mml:mo>%</mml:mo>
                        </mml:math>
                    </inline-formula> of the test data, given 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi>n</mml:mi>
                                <mml:mtext>drift</mml:mtext>
                            </mml:msub>
                            <mml:mo>=</mml:mo>
                            <mml:mn>5</mml:mn>
                            <mml:mo>,</mml:mo>
                            <mml:msub>
                                <mml:mi>&#x03bb;</mml:mi>
                                <mml:mtext>drift</mml:mtext>
                            </mml:msub>
                            <mml:mo>=</mml:mo>
                            <mml:mn>11</mml:mn>
                            <mml:mo>,</mml:mo>
                            <mml:msub>
                                <mml:mi>n</mml:mi>
                                <mml:mi>var</mml:mi>
                            </mml:msub>
                            <mml:mo>=</mml:mo>
                            <mml:mn>24</mml:mn>
                            <mml:mo>,</mml:mo>
                            <mml:msub>
                                <mml:mi>&#x03bb;</mml:mi>
                                <mml:mi>var</mml:mi>
                            </mml:msub>
                            <mml:mo>=</mml:mo>
                            <mml:mn>3</mml:mn>
                        </mml:math>
                    </inline-formula>, see 
                    <xref ref-type="table" rid="T1">Table 1</xref>.</p>
                <table-wrap id="T1" orientation="portrait" position="float">
                    <label>Table 1. </label>
                    <caption>
                        <title>Details of the three data sets used in the case studies.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Dataset</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">#Train</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">#Test</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">%Anomalies</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <italic toggle="yes">SimEuc</italic>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <inline-formula>
                                        <mml:math display="inline">
                                            <mml:mn>3,000</mml:mn>
                                        </mml:math>
                                    </inline-formula>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <inline-formula>
                                        <mml:math display="inline">
                                            <mml:mn>1,000</mml:mn>
                                        </mml:math>
                                    </inline-formula>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <inline-formula>
                                        <mml:math display="inline">
                                            <mml:mn>13.2</mml:mn>
                                        </mml:math>
                                    </inline-formula>
                                </td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <italic toggle="yes">SimRiver</italic>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <inline-formula>
                                        <mml:math display="inline">
                                            <mml:mn>3,000</mml:mn>
                                        </mml:math>
                                    </inline-formula>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <inline-formula>
                                        <mml:math display="inline">
                                            <mml:mn>1,000</mml:mn>
                                        </mml:math>
                                    </inline-formula>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <inline-formula>
                                        <mml:math display="inline">
                                            <mml:mn>13.2</mml:mn>
                                        </mml:math>
                                    </inline-formula>
                                </td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Herbert</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <inline-formula>
                                        <mml:math display="inline">
                                            <mml:mn>12,745</mml:mn>
                                        </mml:math>
                                    </inline-formula>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <inline-formula>
                                        <mml:math display="inline">
                                            <mml:mn>3,499</mml:mn>
                                        </mml:math>
                                    </inline-formula>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>58.0</bold>
                                </td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>
                    <xref ref-type="fig" rid="f3">Figure 3</xref> visualises aspects of the 
                    <italic toggle="yes">SimEuc</italic> dataset, where sensor 37 and sensor 8 are in close (Euclidean) proximity, resulting in a high correlation between the locations (0.65), as anticipated. Note that sensor 23 exhibits anomalous behavior, high-variability and drift, consecutively, over time. Compared to the 
                    <italic toggle="yes">SimRiver</italic> dataset, shown in 
                    <xref ref-type="fig" rid="f4">Figure 4</xref>, we note how the time series from sensor 37 and sensor 8 are no longer strongly correlated (0.07), despite their close proximity, as they are not flow-connected in the simulated river network.</p>
                <fig fig-type="figure" id="f3" orientation="portrait" position="float">
                    <label>Figure 3. </label>
                    <caption>
                        <title>A selection of time series from the 
                            <italic toggle="yes">SimEuc</italic> data set; sensor 8 and sensor 37 separated by a short Euclidean distance share high correlation (Pearson&#x2018;s coefficient of 0.65).</title>
                        <p>A Savitzky-Golay filter smoothens the time series (purple line). On the bottom, sensor 23 illustrates high-variability and drift anomalies, consecutively (red dots).</p>
                    </caption>
                    <graphic id="gr3" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/149233/d8a1db05-aa51-4579-95b0-af83fdacf520_figure3.gif"/>
                </fig>
                <fig fig-type="figure" id="f4" orientation="portrait" position="float">
                    <label>Figure 4. </label>
                    <caption>
                        <title>A selection of series from the 
                            <italic toggle="yes">SimRiver</italic> data set; sensor 8 and sensor 37 are not flow-connected sites, and share low correlation (Pearson&#x2018;s coefficient of 0.07).</title>
                        <p>Drift anomalies (red dots) are shown in data from sensor 4. A Savitzky-Golay filter is used to smoothen the time series for visual representation (purple line).</p>
                    </caption>
                    <graphic id="gr4" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/149233/d8a1db05-aa51-4579-95b0-af83fdacf520_figure4.gif"/>
                </fig>
                <p>
                    <xref ref-type="table" rid="T2">Table 2</xref> shows the performance of the different anomaly detection methods. The best performance in terms of recall is highlighted in bold, while the second-best performance is underlined. We observe that GDN/GDN+ outperforms most other models in terms of recall, which is the fraction of true positives among all actual positive instances. Specifically, GDN has a recall score of 83.3% on 
                    <italic toggle="yes">SimEuc</italic> and 72.7% on SimRiv, while GDN+ has the second highest recall score of 85.6% on 
                    <italic toggle="yes">SimEuc</italic> and 78.0% on SimRiv. Although ARIMA performed best in terms of recall, the high percentage of detected anomalies, 81.6% and 85.6%, is impractical to use (discussed below) and results in low accuracy scores of 28.6% and 25.3%, respectively. HDoutliers did not flag any time point as anomalous in any test case. DeepAnT tends to classify most samples as negative, resulting in a low recall score. These results suggest that GDN/GDN+ are best able to detect a high proportion of actual anomalies in the datasets.</p>
                <table-wrap id="T2" orientation="portrait" position="float">
                    <label>Table 2. </label>
                    <caption>
                        <title>Anomaly detection performance in terms of recall (%), precision (%), accuracy (%), and specificity (%) of GDN and its variants and baseline methods for the simulation study.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Data</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Model</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Rec</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Prec</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Acc</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Spec</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="5" valign="top">SimEuc</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">HDoutliers</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>0.0</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>0.0</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">86.8</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">100.0</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">ARIMA</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>88.6</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">14.4</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">28.6</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">19.4</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">DeepAnT</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">3.8</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">11.9</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">83.5</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">95.7</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">GDN</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">83.3</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">55.3</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">88.9</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">89.7</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">GDN+</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <underline>85.6</underline>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">48.1</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">85.9</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">85.9</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="5" valign="top">SimRiv</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">HDoutliers</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>0.0</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>0.0</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">86.8</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">100.0</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">ARIMA</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>91.7</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">14.2</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>25.3</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>15.1</bold>
                                </td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">DeepAnT</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>0.8</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>9.1</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">85.9</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">98.8</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">GDN</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">72.7</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">54.2</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">88.3</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">90.6</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">GDN+</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <underline>78.0</underline>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">43.1</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">83.5</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">84.3</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>
                    <xref ref-type="fig" rid="f5">Figure 5</xref> shows the classifications of anomalies in the simulation study. For reasons mentioned, recall is the performance metric of interest, but we also consider the trade-off between recall and precision. Lower precision means that the model may also identify some normal instances as anomalies, leading to false positives. In the context of river network anomaly detection, FP may be manually filtered, but it is critical to minimise FN. Note that GDN+ outperforms GDN in minimising the FN count, but at the cost of increasing FP, in both data sets. Such a trade-off is acceptable and considered an improvement in this context. Conversely, while ARIMA has the highest recall score, the number of FP classifications is impractical for practitioners to deal with (&gt;70% of the test data). We also note that drift anomalies are harder to detect than high-variability anomalies, with drift as the majority of FN counts, in all cases.</p>
                <fig fig-type="figure" id="f5" orientation="portrait" position="float">
                    <label>Figure 5. </label>
                    <caption>
                        <title>Anomaly detection performance of ARIMA, GDN, and GDN+ for the simulation study, in terms of true positive (TP), true negative (TN), false positive (FP) and false negative (FN).</title>
                    </caption>
                    <graphic id="gr5" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/149233/d8a1db05-aa51-4579-95b0-af83fdacf520_figure5.gif"/>
                </fig>
                <p>The authors of the GDN model demonstrated its efficacy in detecting anywhere-within-system failures at time 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>t</mml:mi>
                        </mml:math>
                    </inline-formula> by applying a threshold to all sensors within a system. However, the use of sensor-based thresholds in GDN+ has the advantage of indicating anomalies at the individual sensor level. In the context of monitoring river networks, it is crucial to identify the anomalous sensor, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>i</mml:mi>
                        </mml:math>
                    </inline-formula>, at a given time 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>t</mml:mi>
                        </mml:math>
                    </inline-formula>. The percentage of true positives detected at the correct sensor, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>i</mml:mi>
                        </mml:math>
                    </inline-formula>, using the sensor-based anomaly threshold, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi>A</mml:mi>
                                <mml:mi>i</mml:mi>
                            </mml:msub>
                            <mml:mfenced close=")" open="(">
                                <mml:mi>t</mml:mi>
                            </mml:mfenced>
                        </mml:math>
                    </inline-formula>, in GDN+, was 92% and 89% for 
                    <italic toggle="yes">SimEuc</italic> and SimRiver, respectively. Similarly, the rate of true positives detected in the neighbourhood of the correct sensor 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>i</mml:mi>
                        </mml:math>
                    </inline-formula> were 96% and 91%, respectively. This granularity of information is essential for large networks consisting of independent sensors that are separated by significant spatial distances, where the cost of time and travel for sensor replacement or maintenance is substantial.</p>
            </sec>
            <sec id="sec8">
                <title>Replication study</title>
                <p>This section explores the anomaly detection performance of GDN/GDN+ across multiple simulated data sets. The approach is as follows. First, ten new sets of spatial sampling locations are created, and for each set a Gaussian random field evolving over time is simulated, as per 
                    <xref ref-type="disp-formula" rid="e14">Equation 3</xref>. For each set of locations, we again consider both the Euclidean spatial characterisation (
                    <italic toggle="yes">SimEuc</italic>), and the river network spatial characterisation (
                    <italic toggle="yes">SimRiver</italic>), yielding a total of 20 benchmark data sets. In the first case, we use the Euclidean covariance model in 
                    <xref ref-type="disp-formula" rid="e17">Equation 4</xref>, parameterised by, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msup>
                                <mml:mi>&#x03c3;</mml:mi>
                                <mml:mn>2</mml:mn>
                            </mml:msup>
                            <mml:mo>&#x2208;</mml:mo>
                            <mml:mfenced close="]" open="[" separators=",">
                                <mml:mn>1</mml:mn>
                                <mml:mn>5</mml:mn>
                            </mml:mfenced>
                        </mml:math>
                    </inline-formula>, and, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>&#x03b1;</mml:mi>
                            <mml:mo>&#x2208;</mml:mo>
                            <mml:mfenced close="]" open="[" separators=",">
                                <mml:mn>5</mml:mn>
                                <mml:mn>15</mml:mn>
                            </mml:mfenced>
                        </mml:math>
                    </inline-formula>, with independent noise parameter, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msubsup>
                                <mml:mi>&#x03c3;</mml:mi>
                                <mml:mn>0</mml:mn>
                                <mml:mn>2</mml:mn>
                            </mml:msubsup>
                            <mml:mo>&#x2208;</mml:mo>
                            <mml:mfenced close="]" open="[" separators=",">
                                <mml:mn>0</mml:mn>
                                <mml:mn>1</mml:mn>
                            </mml:mfenced>
                        </mml:math>
                    </inline-formula>, and regression parameters 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi>&#x03b2;</mml:mi>
                                <mml:mn>0</mml:mn>
                            </mml:msub>
                            <mml:mo>&#x2208;</mml:mo>
                            <mml:mfenced close="]" open="[" separators=",">
                                <mml:mn>1</mml:mn>
                                <mml:mn>10</mml:mn>
                            </mml:mfenced>
                        </mml:math>
                    </inline-formula>, and, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi>&#x03b2;</mml:mi>
                                <mml:mn>1</mml:mn>
                            </mml:msub>
                            <mml:mo>&#x2208;</mml:mo>
                            <mml:mfenced close="]" open="[" separators=",">
                                <mml:mn>1</mml:mn>
                                <mml:mn>10</mml:mn>
                            </mml:mfenced>
                        </mml:math>
                    </inline-formula>, for the linear model in 
                    <xref ref-type="disp-formula" rid="e13">Equation 2</xref>. The values of the parameters are chosen uniformly at random. The Tail-up covariance model in 
                    <xref ref-type="disp-formula" rid="e18">Equation 5</xref> is used in the second case, parameterised as above.</p>
                <p>Then, anomalies are generated with the following parameters: drift 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>&#x03b4;</mml:mi>
                            <mml:mo>&#x2208;</mml:mo>
                            <mml:mfenced close="]" open="[" separators=",">
                                <mml:mn>3</mml:mn>
                                <mml:mn>6</mml:mn>
                            </mml:mfenced>
                        </mml:math>
                    </inline-formula>, variability 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>&#x03b6;</mml:mi>
                            <mml:mo>&#x2208;</mml:mo>
                            <mml:mfenced close="]" open="[" separators=",">
                                <mml:mn>12</mml:mn>
                                <mml:mn>15</mml:mn>
                            </mml:mfenced>
                        </mml:math>
                    </inline-formula>, length of anomalies 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi>&#x03bb;</mml:mi>
                                <mml:mtext>drift</mml:mtext>
                            </mml:msub>
                            <mml:mo>&#x2208;</mml:mo>
                            <mml:mfenced close="]" open="[" separators=",">
                                <mml:mn>5</mml:mn>
                                <mml:mn>10</mml:mn>
                            </mml:mfenced>
                        </mml:math>
                    </inline-formula>, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi>&#x03bb;</mml:mi>
                                <mml:mi>var</mml:mi>
                            </mml:msub>
                            <mml:mo>&#x2208;</mml:mo>
                            <mml:mfenced close="]" open="[" separators=",">
                                <mml:mn>2</mml:mn>
                                <mml:mn>10</mml:mn>
                            </mml:mfenced>
                        </mml:math>
                    </inline-formula>, and the number of anomalies, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi>n</mml:mi>
                                <mml:mtext>drift</mml:mtext>
                            </mml:msub>
                            <mml:mo>&#x2208;</mml:mo>
                            <mml:mfenced close="]" open="[">
                                <mml:mn>50,100</mml:mn>
                            </mml:mfenced>
                        </mml:math>
                    </inline-formula>, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi>n</mml:mi>
                                <mml:mi>var</mml:mi>
                            </mml:msub>
                            <mml:mo>&#x2208;</mml:mo>
                            <mml:mfenced close="]" open="[">
                                <mml:mn>50,100</mml:mn>
                            </mml:mfenced>
                        </mml:math>
                    </inline-formula> (see 
                    <xref ref-type="fig" rid="B1">Algorithm 1</xref>). Across all simulations, the size of the data set is fixed to have length, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>T</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:mn>4000</mml:mn>
                        </mml:math>
                    </inline-formula>, and number of sensors, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>n</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:mn>40</mml:mn>
                        </mml:math>
                    </inline-formula>.</p>
                <p>
                    <xref ref-type="fig" rid="f6">Figure 6</xref> illustrates the anomaly detection performance for GDN and GDN+, run on each data set with sliding window length 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>w</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:mn>3</mml:mn>
                        </mml:math>
                    </inline-formula>, and Top-K hyperparameter 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>K</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:mn>5</mml:mn>
                        </mml:math>
                    </inline-formula>. Note that the total number of anomalies can be seen at the bar height of TP+FN. In every scenario, GDN+ improves the FN count (red line), but at the cost of an increased TP count (orange line). Whether such a tradeoff is tolerable depends on how critical it is in practical scenarios that true anomalies are successfully detected. Note, that performance varies from one scenario to the next. Nevertheless, despite the simulated datasets being extremely noisy and complex, GDN and GDN+ appear to succeed in successful anomaly detection when other methods cannot.</p>
                <fig fig-type="figure" id="f6" orientation="portrait" position="float">
                    <label>Figure 6. </label>
                    <caption>
                        <title>Anomaly detection performance of GDN, and GDN+ across the twenty simulated benchmarks in the replication study.</title>
                        <p>Note that GDN+ consistently decreases false negatives (red line) in every case, but also increases false positives (orange line).</p>
                    </caption>
                    <graphic id="gr6" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/149233/d8a1db05-aa51-4579-95b0-af83fdacf520_figure6.gif"/>
                </fig>
            </sec>
            <sec id="sec9">
                <title>Case study: Herbert river</title>
                <p>This case study examines water-level data collected from eight sites located across the Herbert river, a major river system located in the tropical region of Australia, as shown in 
                    <xref ref-type="fig" rid="f7">Figure 7</xref>. The time series data is highly non-stationary, characterised by river 
                    <italic toggle="yes">events</italic> caused by abnormal rainfall patterns, with some coastal sites exhibiting shorter periodicity trends which can be attributed to tidal patterns, see 
                    <xref ref-type="fig" rid="f8">Figure 8</xref>. The spatial relationships are complex, and depend on the surrounding water catchment areas, spatial rainfall patterns, dams, and other impediments. In-situ sensors are prone to various anomalies such as battery failure, biofouling (accumulation of microorganisms, plants, algae, or small animals), and damage. In some cases, anomalies can manifest as the absence of a water event (i.e., flatlining) rather than the presence of abnormal time series patterns (i.e., spikes, variability, drift). In real-world scenarios, anomalies can persist for extended periods, and resolving them may require traveling to remote locations to inspect and repair sensors. As seen in 
                    <xref ref-type="fig" rid="f8">Figure 8</xref>, anomalies at time 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>t</mml:mi>
                        </mml:math>
                    </inline-formula> are largely attributed to Sensor 4, which was out of water for long periods of time.</p>
                <fig fig-type="figure" id="f7" orientation="portrait" position="float">
                    <label>Figure 7. </label>
                    <caption>
                        <title>The Herbert river system and sensor locations.</title>
                    </caption>
                    <graphic id="gr7" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/149233/d8a1db05-aa51-4579-95b0-af83fdacf520_figure7.gif"/>
                </fig>
                <fig fig-type="figure" id="f8" orientation="portrait" position="float">
                    <label>Figure 8. </label>
                    <caption>
                        <title>Test data of water level collected from sensors across the Herbert river (light blue), and the corresponding predictions from the GDN model (dark blue).</title>
                        <p>Actual anomalies (red dots) are shown, along with the predicted anomalies (orange line) on the bottom.</p>
                    </caption>
                    <graphic id="gr8" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/149233/d8a1db05-aa51-4579-95b0-af83fdacf520_figure8.gif"/>
                </fig>
                <p>The Herbert river is a challenging data set for all of the anomaly detection models, due to the sparse placement of sensors across the network, i.e., fewer sensors at greater distances apart resulting in weaker spatial relationships, and the test set contains a high proportion of anomalies (58%; see 
                    <xref ref-type="table" rid="T1">Table 1</xref>). GDN applied to the real-world dataset yields a recall of 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mn>29.2</mml:mn>
                            <mml:mo>%</mml:mo>
                        </mml:math>
                    </inline-formula>, with GDN+ improving recall to 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mn>34.8</mml:mn>
                            <mml:mo>%</mml:mo>
                        </mml:math>
                    </inline-formula>, see 
                    <xref ref-type="table" rid="T3">Table 3</xref>. Model performance suffers primarily due to the failure to detect the large anomaly spanning across 2022-01-10 in 
                    <xref ref-type="fig" rid="f8">Figure 8</xref>. This may be attributed to the learned graph relationships being characterised by river events in the training data, and without such events, it is difficult to identify when a sensor is flat-lining. However, the model successfully identified anomalies spanning across 2021-12-27 and 2022-01-03, which coincided with river events.</p>
                <table-wrap id="T3" orientation="portrait" position="float">
                    <label>Table 3. </label>
                    <caption>
                        <title>Anomaly detection performance in terms of recall (%), precision (%), accuracy (%), and specificity (%) of GDN and its variants and baseline methods for the Herbert river case study.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Data</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Model</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Rec</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Prec</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Acc</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Spec</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="6" valign="top">Herbert</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">HDoutliers</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>0.0</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>0.0</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">42.0</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">100.0</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">ARIMA</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">30.5</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">62.9</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">51.3</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">77.5</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">DeepAnT</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>1.6</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">39.7</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">44.0</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">97.0</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">GDN</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">29.2</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">60.6</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">50.1</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">76.2</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">GDN+</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <underline>34.8</underline>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">59.2</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">50.4</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">70.0</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">GDN++</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>100.0</bold>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">80.7</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">86.7</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">70.0</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>
                    <xref ref-type="fig" rid="f9">Figure 9</xref> shows the learned graph adjacency matrix, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>A</mml:mi>
                        </mml:math>
                    </inline-formula>. Sensor 1, separated geographically by a large distance from the other sensors, has weaker relationships with the other nodes. Interestingly, the attention weights indicate that sensor 3 is strongly influenced by sensor 6, with tidal patterns from sensor 6 being evident in the predictions of sensor 3. Large error scores indicating an anomaly spanning across 2021-12-27 are primarily observed in sensor 2, impacted by anomalous sensors 3 and 4, where the predicted values are low. However, due to the small network of sensors in this case study, it is difficult to determine which sensor had the anomaly.</p>
                <fig fig-type="figure" id="f9" orientation="portrait" position="float">
                    <label>Figure 9. </label>
                    <caption>
                        <title>Graph with learned adjacency matrix, 
                            <inline-formula>
                                <mml:math display="inline">
                                    <mml:mi>A</mml:mi>
                                </mml:math>
                            </inline-formula>.</title>
                        <p>The edge weightings are determined by 
                            <inline-formula>
                                <mml:math display="inline">
                                    <mml:msub>
                                        <mml:mi>&#x03b1;</mml:mi>
                                        <mml:mi mathvariant="italic">ij</mml:mi>
                                    </mml:msub>
                                </mml:math>
                            </inline-formula>, which indicates attention and depend on model input, 
                            <inline-formula>
                                <mml:math display="inline">
                                    <mml:msup>
                                        <mml:mi mathvariant="normal">X</mml:mi>
                                        <mml:mfenced close=")" open="(">
                                            <mml:mi>t</mml:mi>
                                        </mml:mfenced>
                                    </mml:msup>
                                </mml:math>
                            </inline-formula>.</p>
                    </caption>
                    <graphic id="gr9" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/149233/d8a1db05-aa51-4579-95b0-af83fdacf520_figure9.gif"/>
                </fig>
                <p>Adjusting the threshold in the GDN+ model can only enhance performance to a limited extent, since some anomalies may have insignificant error scores due to the underlying time series model. For instance, the anomaly spanning across 2022-01-10 has negligible error scores because the prediction model was performing well and no river events had occurred, making it challenging to detect the flat-lining type of anomaly in this particular scenario. Therefore, relying solely on the model may not be sufficient in practice, and we recommend implementing some basic expert rules. We introduce another model variant GDN++, by applying a simple filter to the time series ensuring that all values are positive, used in conjunction with adjusting the threshold calculation (GDN+). That is, an anomaly at time, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>t</mml:mi>
                        </mml:math>
                    </inline-formula>, on sensor, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>i</mml:mi>
                        </mml:math>
                    </inline-formula>, is flagged if, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mo>max</mml:mo>
                            <mml:mfenced close="}" open="{" separators=",">
                                <mml:mrow>
                                    <mml:mi mathvariant="double-struck">I</mml:mi>
                                    <mml:mfenced close="}" open="{">
                                        <mml:mrow>
                                            <mml:msubsup>
                                                <mml:mi>y</mml:mi>
                                                <mml:mi>i</mml:mi>
                                                <mml:mfenced close=")" open="(">
                                                    <mml:mi>t</mml:mi>
                                                </mml:mfenced>
                                            </mml:msubsup>
                                            <mml:mo>&lt;</mml:mo>
                                            <mml:mn>0</mml:mn>
                                        </mml:mrow>
                                    </mml:mfenced>
                                </mml:mrow>
                                <mml:mrow>
                                    <mml:msub>
                                        <mml:mi>A</mml:mi>
                                        <mml:mi>i</mml:mi>
                                    </mml:msub>
                                    <mml:mfenced close=")" open="(">
                                        <mml:mi>t</mml:mi>
                                    </mml:mfenced>
                                </mml:mrow>
                            </mml:mfenced>
                            <mml:mo>=</mml:mo>
                            <mml:mn>1</mml:mn>
                            <mml:mo>.</mml:mo>
                        </mml:math>
                    </inline-formula> GDN++ successfully detects all anomalies.</p>
            </sec>
        </sec>
        <sec id="sec10" sec-type="discussion">
            <title>Discussion</title>
            <p>Multivariate anomaly detection and prediction models for spatio-temporal sensor data have the potential to transform water quality observation, modelling, and management.
                <sup>
                    <xref ref-type="bibr" rid="ref7">7</xref>
                </sup>
                <sup>,</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref8">8</xref>
                </sup> The provision of trustworthy sensor data has four major benefits: 1. It enables the production of finer-scale, reliable and more accurate estimates of sediment and nutrient loads, 2. It provides real-time feedback to landholders and managers, 3. It guides compliance with water quality guidelines, and 4. It allows for the assessment of ecosystem health and the prioritisation of management actions for sustaining aquatic ecosystems. However, technical anomalies in the data provided by the sensors can occur due to factors such as low battery power, biofouling of the probes, and sensor miscalibration. As noted by Ref. 
                <xref ref-type="bibr" rid="ref7">7</xref>, there are a wide variety of anomaly types present within in-situ sensor data (e.g., high-variability, drift, spikes, shifts). Most anomaly detection methods used for water quality applications tend to target specific anomalies, such as sudden spikes or shifts.
                <sup>
                    <xref ref-type="bibr" rid="ref28">28</xref>
                </sup>
                <sup>&#x2013;</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref30">30</xref>
                </sup> Detecting persistent anomalies, such as sensor drift and periods of abnormally high variability, remains very challenging for statistical and machine learning research. Such anomalies are often overlooked by distance and kernel based methods, yet must be detected before the data can be used, because they confound the assessment of status and trends in water quality. Understanding the relationships among, and typical behaviours of, water quality variables and how these differ among climate zones is thus an essential step in distinguishing anomalies from real water quality events.</p>
            <p>We investigated the graph-based neural network model, GDN,
                <sup>
                    <xref ref-type="bibr" rid="ref15">15</xref>
                </sup> for its ability to capture complex interdependencies between different variables in a semi-supervised manner. As such, GDN offered the ability to capture deviations from expected behaviour within a high-dimensional setting. We developed novel bench-marking data sets for subsequence anomaly detection (of variable length and type), with a range of spatio-temporal complexities, inspired by the statistical models recently used for river network data.
                <sup>
                    <xref ref-type="bibr" rid="ref9">9</xref>
                </sup> Results showed that GDN tended to outperform the benchmarks in anomaly detection. We developed a model extension, GDN+, by adjusting the threshold calculation. GDN+ was shown to further improve performance. A replication study with multiple benchmarking data sets demonstrated consistency in these results. Sensor-based thresholds also proved useful in terms of identifying which neighbourhood the anomaly originated from in the simulation study.</p>
            <p>We used a real-world case study of water level in the Herbert river, with non-stationary time series characterised by multiple river events caused by abnormal rainfall patterns. In this case, most of the anomalies appeared as flat-lining, due to the river drying out. Considering an individual time series, such anomalies may not appear obvious, as it is the failure to detect river events (in a multivariate setting) that is indicative of an anomalous sensor. Despite the challenges in the data, GDN+ was shown to successfully detect technical anomalies when river events occurred, and combined with a simple expert-based rule, GDN++, all anomalies were successfully detected.</p>
            <p>There are two recent methodological extensions to the GDN approach. First, the Fused Sparse Autoencoder and Graph Net
                <sup>
                    <xref ref-type="bibr" rid="ref31">31</xref>
                </sup> which extends the GDN approach by augmenting the prediction-based loss with a reconstruction-based term arising from the output of a sparse autoencoder, and a further extension that allows the individual sensors to have multivariate data. Second, a probabilistic (latent variable) extension is trained using variational inference.
                <sup>
                    <xref ref-type="bibr" rid="ref32">32</xref>
                </sup> Since these were published contemporaneously to the present research, and only the latter provided accompanying research code, these approaches were not considered in the paper. Future extensions of this work could consider incorporating the above methodological extensions, as well as developing on the existing software package.</p>
            <p>Other applications could consider the separation of river events from technical anomalies. In terms of interpretability, since the prediction model aggregates feature data from neighbouring sensors, anomalous data can affect the prediction of any other sensor within the neighbourhood. Therefore, large error scores originating from one sensor anomaly can infiltrate through to the entire neighbourhood, and impairs the ability to attribute an anomaly to a sensor. The extensive body of literature on signal processing for source identification has the potential to inspire future solutions in addressing this issue.
                <sup>
                    <xref ref-type="bibr" rid="ref33">33</xref>
                </sup>
                <sup>,</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref34">34</xref>
                </sup>
            </p>
            <p>In summary, this work extends and examines the practicality of the GDN approach when applied to an environmental monitoring application of major international concern, with complex spatial and temporal interdependencies. Successfully addressing the challenge of anomaly detection in such settings can facilitate the wider adoption of in-situ sensors and could revolutionise the monitoring and management of air, soil, and water.</p>
        </sec>
        <sec id="sec11" sec-type="conclusions">
            <title>Conclusions</title>
            <p>This work studied the application of Graph Deviation Network (GDN) based approaches for anomaly detection on the challenging setting on river network data, which often feature sensors that generate high-dimensional data with complex spatio-temporal relationships. We introduced alternative defection criteria for the model (GDN+/GDN++), and their practicality was explored on both real and simulated benchmark data. The findings indicated that GDN and its variants were effective in correctly (and conservatively) identifying anomalies. Benchmark data were generated via an approach that was also introduced in this paper, along with open-source software, and may serve useful in the development and testing of other anomaly-detection methods. In short, we found that graph neural network based approaches to anomaly detection offer a flexible framework, able to capture and model non-standard, highly dynamic, complex relationships over space and time, with the ability to flag a variety of anomaly types. However, the task of anomaly detection on river network sensor data remains a considerable challenge.</p>
        </sec>
        <sec id="sec12">
            <title>Author contributions</title>
            <p>KB; Investigation, Formal Analysis, Methodology, Software, Validation, Visualisation, Writing &#x2013; Original Draft Preparation. RS; Methodology, Supervision, Writing &#x2013; Review &amp; Editing. KM; Funding Acquisition, Conceptualisation, Supervision, Writing &#x2013; Review &amp; Editing. ES; Data Curation, Visualisation, Writing &#x2013; Review &amp; Editing.</p>
        </sec>
    </body>
    <back>
        <sec id="sec13" sec-type="data-availability">
            <title>Data availability</title>
            <p>Zenodo. Water level across the Herbert river. DOI: 
                <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5281/zenodo.8053358">10.5281/zenodo.8053358</ext-link>.</p>
            <p>This project contains the following underlying data:
                <list list-type="bullet">
                    <list-item>
                        <label>&#x2022;</label>
                        <p>herbert_train.csv (sensor data).</p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>herbert_test.csv (sensor data with sensor anomalies flags).
</p>
                    </list-item>
                </list>
            </p>
            <p>Data are available under the terms of the 
                <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Commons Attribution 4.0 International license</ext-link> (CC-BY 4.0).</p>
        </sec>
        <sec id="sec14">
            <title>Software availability</title>
            <list list-type="bullet">
                <list-item>
                    <label>&#x2022;</label>
                    <p>Source code avaliable from 
                        <ext-link ext-link-type="uri" xlink:href="https://github.com/KatieBuc/gnnad/releases">https://github.com/KatieBuc/gnnad/releases</ext-link>
                    </p>
                </list-item>
                <list-item>
                    <label>&#x2022;</label>
                    <p>Archived source code at time of publication: Zenodo. 
                        <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5281/zenodo.8111760">10.5281/zenodo.8111760</ext-link>
                    </p>
                </list-item>
                <list-item>
                    <label>&#x2022;</label>
                    <p>License: MIT License
</p>
                </list-item>
            </list>
        </sec>
        <ack>
            <title>Acknowledgements</title>
            <p>&#x0141;ukasz Mentel; Software. Cameron Roberts; Data Curation. James McGree; Writing &#x2013; Review &amp; Editing. A version of this manuscript has been uploaded onto the pre-print server, 
                <ext-link ext-link-type="uri" xlink:href="http://arxivs.org">arxivs.org</ext-link> as 
                <ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/2304.09367">https://arxiv.org/abs/2304.09367</ext-link>.</p>
        </ack>
        <ref-list>
            <title>References</title>
            <ref id="ref1">
                <label>1</label>
                <mixed-citation publication-type="other">
                    <article-title>UN General Assembly Transforming our World: The 2030 Agenda for Sustainable Development.</article-title>
                    <source>

                        <italic toggle="yes">United Nations.</italic>
</source>
                    <year>2015</year>. (A/RES/70/1).</mixed-citation>
            </ref>
            <ref id="ref2">
                <label>2</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Marinho e Silva</surname>
                            <given-names>G</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Campos</surname>
                            <given-names>DF</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Brasil</surname>
                            <given-names>JAT</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Advances in technological research for online and in situ water quality monitoring&#x2014;A review.</article-title>
                    <source>

                        <italic toggle="yes">Sustainability.</italic>
</source>
                    <year>2022</year>;<volume>14</volume>(<issue>9</issue>):<fpage>5059</fpage>.
                    <pub-id pub-id-type="doi">10.3390/su14095059</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref3">
                <label>3</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ritchie</surname>
                            <given-names>JC</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Zimba</surname>
                            <given-names>PV</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Everitt</surname>
                            <given-names>JH</given-names>
                        </name>
</person-group>:
                    <article-title>Remote sensing techniques to assess water quality.</article-title>
                    <source>

                        <italic toggle="yes">Photogramm. Eng. Remote Sens.</italic>
</source>
                    <year>2003</year>;<volume>69</volume>(<issue>6</issue>):<fpage>695</fpage>&#x2013;<lpage>704</lpage>.
                    <pub-id pub-id-type="doi">10.14358/PERS.69.6.695</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref4">
                <label>4</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kang</surname>
                            <given-names>JM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Shekhar</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Henjum</surname>
                            <given-names>M</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <chapter-title>Discovering teleconnected flow anomalies: A relationship analysis of dynamic neighborhoods (RAD) approach.</chapter-title>
                    <source>

                        <italic toggle="yes">International Symposium on Advances in Spatial and Temporal Databases.</italic>
</source>
                    <publisher-name>Springer</publisher-name>;<year>2009</year>; pp.<fpage>44</fpage>&#x2013;<lpage>61</lpage>.</mixed-citation>
            </ref>
            <ref id="ref5">
                <label>5</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Bl&#x00e1;zquez-Garca</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Conde</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mori</surname>
                            <given-names>U</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A review on outlier/anomaly detection in time series data.</article-title>
                    <source>

                        <italic toggle="yes">ACM Computing Surveys (CSUR).</italic>
</source>
                    <year>2021</year>;<volume>54</volume>(<issue>3</issue>):<fpage>1</fpage>&#x2013;<lpage>33</lpage>.
                    <pub-id pub-id-type="doi">10.1145/3444690</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref6">
                <label>6</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Bourgeois</surname>
                            <given-names>W</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Romain</surname>
                            <given-names>A-C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Nicolas</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>The use of sensor arrays for environmental monitoring: interests and limitations.</article-title>
                    <source>

                        <italic toggle="yes">J. Environ. Monit.</italic>
</source>
                    <year>2003</year>;<volume>5</volume>(<issue>6</issue>):<fpage>852</fpage>&#x2013;<lpage>860</lpage>.
                    <pub-id pub-id-type="doi">10.1039/b307905h</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref7">
                <label>7</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Leigh</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Alsibai</surname>
                            <given-names>O</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hyndman</surname>
                            <given-names>RJ</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A framework for automated anomaly detection in high frequency water-quality data from in situ sensors.</article-title>
                    <source>

                        <italic toggle="yes">Sci. Total Environ.</italic>
</source>
                    <year>2019</year>;<volume>664</volume>:<fpage>885</fpage>&#x2013;<lpage>898</lpage>.
                    <pub-id pub-id-type="pmid">30769312</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.scitotenv.2019.02.085</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref8">
                <label>8</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ver Hoef</surname>
                            <given-names>JM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Peterson</surname>
                            <given-names>EE</given-names>
                        </name>
</person-group>:
                    <article-title>A moving average approach for spatial statistical models of stream networks.</article-title>
                    <source>

                        <italic toggle="yes">J. Am. Stat. Assoc.</italic>
</source>
                    <year>2010</year>;<volume>105</volume>(<issue>489</issue>):<fpage>6</fpage>&#x2013;<lpage>18</lpage>.
                    <pub-id pub-id-type="doi">10.1198/jasa.2009.ap08248</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref9">
                <label>9</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Santos-Fernandez</surname>
                            <given-names>E</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ver</surname>
                            <given-names>JM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hoef</surname>
                            <given-names>EE</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Bayesian spatio-temporal models for stream networks.</article-title>
                    <source>

                        <italic toggle="yes">Comput. Stat. Data Anal.</italic>
</source>
                    <year>2022</year>;<volume>170</volume>:<fpage>107446</fpage>.
                    <pub-id pub-id-type="doi">10.1016/j.csda.2022.107446</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref10">
                <label>10</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Porter</surname>
                            <given-names>JH</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hanson</surname>
                            <given-names>PC</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lin</surname>
                            <given-names>C-C</given-names>
                        </name>
</person-group>:
                    <article-title>Staying afloat in the sensor data deluge.</article-title>
                    <source>

                        <italic toggle="yes">Trends Ecol. Evol.</italic>
</source>
                    <year>2012</year>;<volume>27</volume>(<issue>2</issue>):<fpage>121</fpage>&#x2013;<lpage>129</lpage>.
                    <pub-id pub-id-type="pmid">22206661</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.tree.2011.11.009</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref11">
                <label>11</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Rodriguez-Perez</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Leigh</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Liquet</surname>
                            <given-names>B</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Detecting technical anomalies in high-frequency water-quality data using artificial neural networks.</article-title>
                    <source>

                        <italic toggle="yes">Environ. Sci. Technol.</italic>
</source>
                    <year>2020</year>;<volume>54</volume>(<issue>21</issue>):<fpage>13719</fpage>&#x2013;<lpage>13730</lpage>.
                    <pub-id pub-id-type="pmid">32856893</pub-id>
                    <pub-id pub-id-type="doi">10.1021/acs.est.0c04069</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref12">
                <label>12</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Wang</surname>
                            <given-names>X</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lin</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Patel</surname>
                            <given-names>N</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Exact variable-length anomaly detection algorithm for univariate and multivariate time series.</article-title>
                    <source>

                        <italic toggle="yes">Data Min. Knowl. Disc.</italic>
</source>
                    <year>2018</year>;<volume>32</volume>:<fpage>1806</fpage>&#x2013;<lpage>1844</lpage>.
                    <pub-id pub-id-type="doi">10.1007/s10618-018-0569-7</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref13">
                <label>13</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Min</surname>
                            <given-names>H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Feng</surname>
                            <given-names>X</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ji</surname>
                            <given-names>Z</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A novel computational approach for discord search with local recurrence rates in multivariate time series.</article-title>
                    <source>

                        <italic toggle="yes">Inf. Sci.</italic>
</source>
                    <year>2019</year>;<volume>477</volume>:<fpage>220</fpage>&#x2013;<lpage>233</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.ins.2018.10.047</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref14">
                <label>14</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Munir</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Siddiqui</surname>
                            <given-names>SA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Dengel</surname>
                            <given-names>A</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>DeepAnT: A deep learning approach for unsupervised anomaly detection in time series.</article-title>
                    <source>

                        <italic toggle="yes">IEEE Access.</italic>
</source>
                    <year>2018</year>;<volume>7</volume>:<fpage>1991</fpage>&#x2013;<lpage>2005</lpage>.</mixed-citation>
            </ref>
            <ref id="ref15">
                <label>15</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Deng</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hooi</surname>
                            <given-names>B</given-names>
                        </name>
</person-group>:
                    <article-title>Graph neural network-based anomaly detection in multivariate time series.</article-title>
                    <source>

                        <italic toggle="yes">Proceedings of the AAAI Conference on Artificial Intelligence.</italic>
</source>
                    <year>2021</year>; vol.<volume>35</volume>: pp.<fpage>4027</fpage>&#x2013;<lpage>4035</lpage>.</mixed-citation>
            </ref>
            <ref id="ref16">
                <label>16</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Schmidl</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wenig</surname>
                            <given-names>P</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Papenbrock</surname>
                            <given-names>T</given-names>
                        </name>
</person-group>:
                    <article-title>Anomaly detection in time series: A comprehensive evaluation.</article-title>
                    <source>

                        <italic toggle="yes">Proc. VLDB Endow.</italic>
</source>
                    <year>2022</year>;<volume>15</volume>(<issue>9</issue>):<fpage>1779</fpage>&#x2013;<lpage>1797</lpage>.
                    <pub-id pub-id-type="doi">10.14778/3538598.3538602</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref17">
                <label>17</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Wilkinson</surname>
                            <given-names>L</given-names>
                        </name>
</person-group>:
                    <article-title>Visualizing Big Data outliers through distributed aggregation.</article-title>
                    <source>

                        <italic toggle="yes">IEEE Trans. Vis. Comput. Graph.</italic>
</source>
                    <year>2017</year>;<volume>24</volume>(<issue>1</issue>):<fpage>256</fpage>&#x2013;<lpage>266</lpage>.
                    <pub-id pub-id-type="pmid">28866555</pub-id>
                    <pub-id pub-id-type="doi">10.1109/TVCG.2017.2744685</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref18">
                <label>18</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Goldstein</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Uchida</surname>
                            <given-names>S</given-names>
                        </name>
</person-group>:
                    <article-title>A comparative evaluation of unsupervised anomaly detection algorithms for multivariate data.</article-title>
                    <source>

                        <italic toggle="yes">PLoS One.</italic>
</source>
                    <year>2016</year>;<volume>11</volume>(<issue>4</issue>):<fpage>e0152173</fpage>.
                    <pub-id pub-id-type="pmid">27093601</pub-id>
                    <pub-id pub-id-type="doi">10.1371/journal.pone.0152173</pub-id>
                    <pub-id pub-id-type="pmcid">PMC4836738</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref19">
                <label>19</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Nassif</surname>
                            <given-names>AB</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Talib</surname>
                            <given-names>MA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Nasir</surname>
                            <given-names>Q</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Machine learning for anomaly detection: A systematic review.</article-title>
                    <source>

                        <italic toggle="yes">IEEE Access.</italic>
</source>
                    <year>2021</year>;<volume>9</volume>:<fpage>78658</fpage>&#x2013;<lpage>78700</lpage>.
                    <pub-id pub-id-type="doi">10.1109/ACCESS.2021.3083060</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref20">
                <label>20</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Velivckovi&#x0107;</surname>
                            <given-names>P</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Cucurull</surname>
                            <given-names>G</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Casanova</surname>
                            <given-names>A</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Graph attention networks.</article-title>
                    <source>

                        <italic toggle="yes">International Conference on Learning Representations (ICLR).</italic>
</source>
                    <year>2018</year>.</mixed-citation>
            </ref>
            <ref id="ref21">
                <label>21</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Buchhorn</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mengersen</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Santos-Fernandez</surname>
                            <given-names>E</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Bayesian design with sampling windows for complex spatial processes.</article-title>
                    <source>

                        <italic toggle="yes">arXiv preprint arXiv:2206.05369.</italic>
</source>
                    <year>2022</year>.</mixed-citation>
            </ref>
            <ref id="ref22">
                <label>22</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Cressie</surname>
                            <given-names>N</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Frey</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Harch</surname>
                            <given-names>B</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Spatial prediction on a river network.</article-title>
                    <source>

                        <italic toggle="yes">J. Agric. Biol. Environ. Stat.</italic>
</source>
                    <year>2006</year>;<volume>11</volume>(<issue>2</issue>):<fpage>127</fpage>&#x2013;<lpage>150</lpage>.
                    <pub-id pub-id-type="doi">10.1198/108571106X110649</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref23">
                <label>23</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ver</surname>
                            <given-names>JM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hoef</surname>
                            <given-names>EP</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Theobald</surname>
                            <given-names>D</given-names>
                        </name>
</person-group>:
                    <article-title>Spatial statistical models that use flow and stream distance.</article-title>
                    <source>

                        <italic toggle="yes">Environ. Ecol. Stat.</italic>
</source>
                    <year>2006</year>;<volume>13</volume>(<issue>4</issue>):<fpage>449</fpage>&#x2013;<lpage>464</lpage>.
                    <pub-id pub-id-type="doi">10.1007/s10651-006-0022-8</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref24">
                <label>24</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Shreve</surname>
                            <given-names>RL</given-names>
                        </name>
</person-group>:
                    <article-title>Statistical law of stream numbers.</article-title>
                    <source>

                        <italic toggle="yes">J. Geol.</italic>
</source>
                    <year>1966</year>;<volume>74</volume>(<issue>1</issue>):<fpage>17</fpage>&#x2013;<lpage>37</lpage>.
                    <pub-id pub-id-type="doi">10.1086/627137</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref25">
                <label>25</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Peterson</surname>
                            <given-names>DP</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Fausch</surname>
                            <given-names>KD</given-names>
                        </name>
</person-group>:
                    <article-title>Upstream movement by nonnative brook trout (salvelinus fontinalis) promotes invasion of native cutthroat trout (oncorhynchus clarki) habitat.</article-title>
                    <source>

                        <italic toggle="yes">Can. J. Fish. Aquat. Sci.</italic>
</source>
                    <year>2003</year>;<volume>60</volume>(<issue>12</issue>):<fpage>1502</fpage>&#x2013;<lpage>1516</lpage>.
                    <pub-id pub-id-type="doi">10.1139/f03-128</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref26">
                <label>26</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Rob</surname>
                            <given-names>J</given-names>
                        </name>
</person-group>:
                    <chapter-title>Hyndman and George Athanasopoulos.</chapter-title>
                    <source>

                        <italic toggle="yes">Forecasting: principles and practice.</italic>
</source>
                    <edition>3rd ed.</edition>
                    <publisher-name>OTexts</publisher-name>;<year>2021</year>.</mixed-citation>
            </ref>
            <ref id="ref27">
                <label>27</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ver Hoef</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Peterson</surname>
                            <given-names>E</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Clifford</surname>
                            <given-names>D</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>SSN: An R package for spatial statistical modeling on stream networks.</article-title>
                    <source>

                        <italic toggle="yes">J. Stat. Softw.</italic>
</source>
                    <year>2014</year>;<volume>56</volume>:<fpage>1</fpage>&#x2013;<lpage>45</lpage>.</mixed-citation>
            </ref>
            <ref id="ref28">
                <label>28</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Talagala</surname>
                            <given-names>PD</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hyndman</surname>
                            <given-names>RJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Leigh</surname>
                            <given-names>C</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A feature-based procedure for detecting technical outliers in water-quality data from in situ sensors.</article-title>
                    <source>

                        <italic toggle="yes">Water Resour. Res.</italic>
</source>
                    <year>2019</year>;<volume>55</volume>(<issue>11</issue>):<fpage>8547</fpage>&#x2013;<lpage>8568</lpage>.
                    <pub-id pub-id-type="doi">10.1029/2019WR024906</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref29">
                <label>29</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Hill</surname>
                            <given-names>DJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Minsker</surname>
                            <given-names>BS</given-names>
                        </name>
</person-group>:
                    <article-title>Anomaly detection in streaming environmental sensor data: A data-driven modeling approach.</article-title>
                    <source>

                        <italic toggle="yes">Environ. Model Softw.</italic>
</source>
                    <year>2010</year>;<volume>25</volume>(<issue>9</issue>):<fpage>1014</fpage>&#x2013;<lpage>1022</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.envsoft.2009.08.010</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref30">
                <label>30</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ba</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>McKenna</surname>
                            <given-names>SA</given-names>
                        </name>
</person-group>:
                    <article-title>Water quality monitoring with online change-point detection methods.</article-title>
                    <source>

                        <italic toggle="yes">J. Hydroinf.</italic>
</source>
                    <year>2015</year>;<volume>17</volume>(<issue>1</issue>):<fpage>7</fpage>&#x2013;<lpage>19</lpage>.
                    <pub-id pub-id-type="doi">10.2166/hydro.2014.126</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref31">
                <label>31</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Han</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Woo</surname>
                            <given-names>SS</given-names>
                        </name>
</person-group>:
                    <article-title>Learning sparse latent graph representations for anomaly detection in multivariate time series.</article-title>
                    <source>

                        <italic toggle="yes">Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining.</italic>
</source>
                    <year>2022</year>; pp.<fpage>2977</fpage>&#x2013;<lpage>2986</lpage>.</mixed-citation>
            </ref>
            <ref id="ref32">
                <label>32</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Chen</surname>
                            <given-names>W</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Tian</surname>
                            <given-names>L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Chen</surname>
                            <given-names>B</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <chapter-title>Deep Variational Graph Convolutional Recurrent Network for Multivariate Time Series Anomaly Detection.</chapter-title>
                    <source>

                        <italic toggle="yes">International Conference on Machine Learning.</italic>
</source>
                    <publisher-name>PMLR</publisher-name>;<year>2022</year>; pp.<fpage>3621</fpage>&#x2013;<lpage>3633</lpage>.</mixed-citation>
            </ref>
            <ref id="ref33">
                <label>33</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Scharf</surname>
                            <given-names>LL</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Demeure</surname>
                            <given-names>C</given-names>
                        </name>
</person-group>:
                    <source>

                        <italic toggle="yes">Statistical signal processing: detection, estimation, and time series analysis.</italic>
</source>
                    <publisher-name>Prentice Hall</publisher-name>;<year>1991</year>.</mixed-citation>
            </ref>
            <ref id="ref34">
                <label>34</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Telford</surname>
                            <given-names>WM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Telford</surname>
                            <given-names>WM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Geldart</surname>
                            <given-names>LP</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <source>

                        <italic toggle="yes">Applied geophysics.</italic>
</source>
                    <publisher-name>Cambridge University Press</publisher-name>;<year>1990</year>.</mixed-citation>
            </ref>
        </ref-list>
        <fn-group content-type="footnotes">
            <fn id="fn1">
                <label>

                    <sup>1</sup>
                </label>
                <p>

                    <italic toggle="yes">In-situ
</italic> refers to an instrument in direct contact with the medium of observation.</p>
            </fn>
            <fn id="fn2">
                <label>

                    <sup>2</sup>
                </label>
                <p>It seems that the authors of Ref. 
                    <xref ref-type="bibr" rid="ref15">15</xref> mistakenly denote this addition as concatenation in the paper, however, their corresponding reference code computes addition.</p>
            </fn>
        </fn-group>
    </back>
    <sub-article article-type="reviewer-report" id="report208772">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.149233.r208772</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Ara&#x00fa;jo Dias</surname>
                        <given-names>Maur&#x00ed;cio</given-names>
                    </name>
                    <xref ref-type="aff" rid="r208772a1">1</xref>
                    <role>Referee</role>
                </contrib>
                <aff id="r208772a1">
                    <label>1</label>Department of Mathematics and Computer Science, School of Sciences and Technology, Universidade Estadual Paulista, S&#x00e3;o Paulo, State of S&#x00e3;o Paulo, Brazil</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>20</day>
                <month>10</month>
                <year>2023</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2023 Ara&#x00fa;jo Dias M</copyright-statement>
                <copyright-year>2023</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport208772" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.136097.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve-with-reservations</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>
                <list list-type="order">
                    <list-item>
                        <p>In recent years, there has been an increasing interest in distinguishing anomaly detection from outlier detection (Chandola 
                            <italic>et al.</italic>, 2009
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-208772-1">1</xref>
                            </sup>; Chandola 
                            <italic>et al.</italic>, 2012
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-208772-2">2</xref>
                            </sup>; Kittler 
                            <italic>et al.</italic>, 2014
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-208772-3">3</xref>
                            </sup>; Dias 
                            <italic>et al.</italic>, 2019
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-208772-4">4</xref>
                            </sup>; Dias 
                            <italic>et al.</italic>, 2022
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-208772-5">5</xref>
                            </sup>). According to the scientific literature, the former differs from the latter by categorizing outliers based on some taxonomy, in addition to just detecting them. In 2009 and 2010, a small taxonomy was used in Chandola 
                            <italic>et al. </italic>(2009
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-208772-1">1</xref>
                            </sup>) and Chandola
                            <italic> et al. </italic>(2012
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-208772-2">2</xref>
                            </sup>). In 2014, Kittler&#x2019;s Taxonomy, a more complete taxonomy, was introduced in Kittler 
                            <italic>et al.</italic> (2014
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-208772-3">3</xref>
                            </sup>). In 2020, Kittler&#x2019;s Taxonomy was used as a basis to detect anomalies (high variability) mainly in rivers and in a spatial context, as described in Dias 
                            <italic>et al.</italic> (2019
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-208772-4">4</xref>
                            </sup>). In 2022, a detection of anomalies (drift) based on Kittler&#x2019;s Taxonomy, in a river and mainly in a temporal context, was described in Dias 
                            <italic>et al.</italic> (2022
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-208772-5">5</xref>
                            </sup>).</p>
                        <p> </p>
                        <p> The reviewed article, which presents a graph neural network-based anomaly detection for river network systems, is in agreement with the aforementioned papers. In the reviewed article, the anomaly detection process deals with the complexity and variability of the data from spatio-temporal relationships between sensors for accurate and continuous monitoring of river water quality. Another positive aspect of the reviewed article is that it actually describes anomaly detection, in contrast to many articles present in the scientific literature that describe outlier detection despite using the term anomaly detection. Moreover, the work is clearly and accurately presented. A negative aspect of the reviewed article is that it could have had more references on this distinguishing and on anomaly taxonomies. It would also be interesting to highlight in the beginning of the article which taxonomy the authors based on to categorize anomalies as being of the following types: high-variability, drift, spikes, and shifts (ref. 7 cited by the article maybe?).</p>
                        <p> </p>
                    </list-item>
                    <list-item>
                        <p>In general, the study design is appropriate. My only suggestions are, please: (1) To add north arrow and scale bar to Figures 1, 2, and 7. (2) To consider presenting the algorithm in page 9 (Algorithm 2, maybe?) in the same style that Algorithm 1 was presented in page 8. The work is technically sound.</p>
                        <p> </p>
                    </list-item>
                    <list-item>
                        <p>There are sufficient details of methods and analysis provided to allow replication by others. The reviewed article provides researchers with many mathematical definitions regarding the steps of the methodology used in the work. Those mathematical definitions allow researchers to replicate the work more accurately. The analysis process is supported by a number of real and simulated benchmark data.</p>
                        <p> </p>
                    </list-item>
                    <list-item>
                        <p>The statistical analysis and its interpretation is appropriate since the work validated its results based on well-established metrics, as follows: (1) The quantity of truly relevant results was measured using Recall. (2) The efficiency of results was measured using Accuracy. (3) The relevancy of results was measured using Precision. (4) The ratio of true negatives to all negative outcomes was measured using Specificity.</p>
                        <p> </p>
                    </list-item>
                    <list-item>
                        <p>The source data and software underlying the results of this work are all available on the Internet to ensure full reproducibility. The authors add &#x201c;Data Availability&#x201d; and &#x201c;Software Availability&#x201d; subsections to the end of the paper. My only suggestion is to distinguish and highlight &#x201c;real&#x201d; and &#x201c;simulated&#x201d; benchmark data in the subsection &#x201c;Data Availability&#x201d;.</p>
                        <p> </p>
                    </list-item>
                    <list-item>
                        <p>The conclusions drawn are adequately supported by the results. However, I would like to highlight a certain feature present in the reviewed article, although I do not particularly consider it a flaw. The contents of the &#x201c;Discussion&#x201d; and &#x201c;Conclusions&#x201d; sections of the reviewed article are most commonly found in the scientific literature merged to form a broader &#x201c;Conclusions&#x201d; section than that presented in the reviewed article. For example, suggestions for future work are normally part of the &#x201c;Conclusions&#x201d;, but in the reviewed article they appear in the &#x201c;Discussion&#x201d; (in a scientific article, it seems a bit strange to &#x201c;discuss&#x201d; something that has not yet been done). The scientific literature usually discusses the results found. This is done in the reviewed article, but merged with the &#x201c;Results&#x201d; section, rather than in the &#x201c;Discussion&#x201d;.</p>
                    </list-item>
                </list>
            </p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>Partly</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>Yes</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Yes</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>Yes</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>Yes</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Yes</p>
            <p>Reviewer Expertise:</p>
            <p>NA</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above.</p>
        </body>
        <back>
            <ref-list>
                <title>References</title>
                <ref id="rep-ref-208772-1">
                    <label>1</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Anomaly detection</article-title>.
                        <source>
                            <italic>ACM Computing Surveys</italic>
                        </source>.<year>2009</year>;<volume>41</volume>(<issue>3</issue>) :
                        <elocation-id>10.1145/1541880.1541882</elocation-id>
                        <fpage>1</fpage>-<lpage>58</lpage>
                        <pub-id pub-id-type="doi">10.1145/1541880.1541882</pub-id>
                    </mixed-citation>
                </ref>
                <ref id="rep-ref-208772-2">
                    <label>2</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Anomaly Detection for Discrete Sequences: A Survey</article-title>.
                        <source>
                            <italic>IEEE Transactions on Knowledge and Data Engineering</italic>
                        </source>.<year>2012</year>;<volume>24</volume>(<issue>5</issue>) :
                        <elocation-id>10.1109/TKDE.2010.235</elocation-id>
                        <fpage>823</fpage>-<lpage>839</lpage>
                        <pub-id pub-id-type="doi">10.1109/TKDE.2010.235</pub-id>
                    </mixed-citation>
                </ref>
                <ref id="rep-ref-208772-3">
                    <label>3</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Domain Anomaly Detection in Machine Perception: A System Architecture and Taxonomy.</article-title>
                        <source>
                            <italic>IEEE Trans Pattern Anal Mach Intell</italic>
                        </source>.<year>2014</year>;<volume>36</volume>(<issue>5</issue>) :
                        <elocation-id>10.1109/TPAMI.2013.209</elocation-id>
                        <fpage>845</fpage>-<lpage>59</lpage>
                        <pub-id pub-id-type="pmid">26353221</pub-id>
                        <pub-id pub-id-type="doi">10.1109/TPAMI.2013.209</pub-id>
                    </mixed-citation>
                </ref>
                <ref id="rep-ref-208772-4">
                    <label>4</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>An Incongruence-Based Anomaly Detection Strategy for Analyzing Water Pollution in Images from Remote Sensing</article-title>.
                        <source>
                            <italic>Remote Sensing</italic>
                        </source>.<year>2019</year>;<volume>12</volume>(<issue>1</issue>) :
                        <elocation-id>10.3390/rs12010043</elocation-id>
                        <pub-id pub-id-type="doi">10.3390/rs12010043</pub-id>
                    </mixed-citation>
                </ref>
                <ref id="rep-ref-208772-5">
                    <label>5</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>A Machine Learning Strategy Based on Kittler&#x2019;s Taxonomy to Detect Anomalies and Recognize Contexts Applied to Monitor Water Bodies in Environments</article-title>.
                        <source>
                            <italic>Remote Sensing</italic>
                        </source>.<year>2022</year>;<volume>14</volume>(<issue>9</issue>) :
                        <elocation-id>10.3390/rs14092222</elocation-id>
                        <pub-id pub-id-type="doi">10.3390/rs14092222</pub-id>
                    </mixed-citation>
                </ref>
            </ref-list>
        </back>
        <sub-article article-type="response" id="comment10959-208772">
            <front-stub>
                <contrib-group>
                    <contrib contrib-type="author">
                        <name>
                            <surname>Buchhorn</surname>
                            <given-names>Katie</given-names>
                        </name>
                        <aff>School of Mathematical Science, Queensland University of Technology, Brisbane, Queensland, Australia</aff>
                    </contrib>
                </contrib-group>
                <author-notes>
                    <fn fn-type="conflict">
                        <p>
                            <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                    </fn>
                </author-notes>
                <pub-date pub-type="epub">
                    <day>26</day>
                    <month>1</month>
                    <year>2024</year>
                </pub-date>
            </front-stub>
            <body>
                <p>The authors thank the reviewer for their thoughtful feedback. Please see the enumerated responses below:</p>
                <p> 1.&#x00a0;Clarification of the anomaly taxonomy has been added in the introduction, including reference 7. Thank you for highlighting these papers. We considered the inclusion of the above references, however since as their definition of &#x201c;drift&#x201d; differs, they were not included to avoid confusion.</p>
                <p> 2.&#x00a0;North arrow and scale have been added to Figure 7 (Figure 1 &amp; 2 are simulated data with arbitrary values for the scale and direction). Algorithm 1 is pseudo-code, whereas on the following page, an excerpt of python code is provided. Thus, we feel it is necessary to have a distinction in presentation.</p>
                <p> 5.&#x00a0;The &#x201c;Data availability&#x201d; section has been updated with this clarification.</p>
            </body>
        </sub-article>
    </sub-article>
    <sub-article article-type="reviewer-report" id="report199872">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.149233.r199872</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Deng</surname>
                        <given-names>Ailin</given-names>
                    </name>
                    <xref ref-type="aff" rid="r199872a1">1</xref>
                    <role>Referee</role>
                </contrib>
                <aff id="r199872a1">
                    <label>1</label>National University of Singapore, Singapore, Singapore</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>28</day>
                <month>9</month>
                <year>2023</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2023 Deng A</copyright-statement>
                <copyright-year>2023</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport199872" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.136097.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve-with-reservations</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>The authors provide detailed analysis on a previous anomaly detection method, GDN and improve it by proposing a new threshold selection methodology based on the learned local graph structure, denoted as GDN+. The work mainly focuses on the river network systems and further provides benchmarking data generation which is specifically designed for the river network or similar systems' analysis. They show the GNN-based method&#x2019;s effectiveness in the river network systems in their study.</p>
            <p> </p>
            <p> Pros: 
                <list list-type="bullet">
                    <list-item>
                        <p>The threshold selection based on the neighborhood sensors is interesting and reasonable. It enables a more flexible threshold for sensors based on the learned graph structure.</p>
                    </list-item>
                    <list-item>
                        <p>The authors provide detailed code implementation and python package for the method.</p>
                    </list-item>
                    <list-item>
                        <p>They also provide methods for generating data based on spatio-temporal structures.</p>
                    </list-item>
                    <list-item>
                        <p>The case study is interesting and effective to show the interpretability in this river system scenario.</p>
                    </list-item>
                </list> </p>
            <p> Cons: 
                <list list-type="bullet">
                    <list-item>
                        <p>It would be interesting to have an ablation study on the effect of the \tau selection, it seems the default setting is 99 (or should it be 99%?), it would be good to see how different \tau will affect the recall or other performance metrics.</p>
                    </list-item>
                </list> </p>
            <p> Minor: 
                <list list-type="bullet">
                    <list-item>
                        <p>In table 2, some of the methods are bold, but it seems they are not achieving the best recall performance, such as ARIMA.</p>
                    </list-item>
                    <list-item>
                        <p>In GDN+ threshold part, should the denotation exclude i for A_{ji}? As the A_{ij} includes {i=j} and the description is &#x201c;across the neighborhood of i&#x201d; which might not include sensor i itself.</p>
                    </list-item>
                    <list-item>
                        <p>Some of the claims could be rephrased more gently or targeted at a more specific domain of problem, such as anomaly detection in river network systems. Page 4 of 18: &#x201c;Lack of open-source software for anomaly detection&#x2026;&#x201d;. For anomaly detection, there are some existing open-source packages, such as PyOD (Zhao 
                            <italic>et al.</italic>, 2019
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-199872-1">1</xref>
                            </sup>).</p>
                    </list-item>
                </list> </p>
            <p> In overall, I think this paper has a clear writing and detailed study in anomaly detection in the specific river system and provides open-source code for method and data generation for this kind of problem. Some ablation study and minor or typos can be further addressed.</p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>Partly</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>Not applicable</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Yes</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>Yes</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>Partly</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Yes</p>
            <p>Reviewer Expertise:</p>
            <p>Anomaly detection</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above.</p>
        </body>
        <back>
            <ref-list>
                <title>References</title>
                <ref id="rep-ref-199872-1">
                    <label>1</label>
                    <mixed-citation>
                        <person-group person-group-type="author"/>:
                        <article-title>PyOD: A Python Toolbox for Scalable Outlier Detection</article-title>.
                        <source>
                            <italic>JMLR</italic>
                        </source>.<year>2019</year>;<volume>20</volume>(<issue>96</issue>) :<fpage>1</fpage>-<lpage>7</lpage>
                        <ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/1901.01588">Reference source</ext-link>
                    </mixed-citation>
                </ref>
            </ref-list>
        </back>
        <sub-article article-type="response" id="comment10958-199872">
            <front-stub>
                <contrib-group>
                    <contrib contrib-type="author">
                        <name>
                            <surname>Buchhorn</surname>
                            <given-names>Katie</given-names>
                        </name>
                        <aff>School of Mathematical Science, Queensland University of Technology, Brisbane, Queensland, Australia</aff>
                    </contrib>
                </contrib-group>
                <author-notes>
                    <fn fn-type="conflict">
                        <p>
                            <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                    </fn>
                </author-notes>
                <pub-date pub-type="epub">
                    <day>26</day>
                    <month>1</month>
                    <year>2024</year>
                </pub-date>
            </front-stub>
            <body>
                <p>The authors thank the reviewer for their thoughtful feedback. Please see the responses below:</p>
                <p> Cons:</p>
                <p> - Tau is introduced as a percentile (as opposed to percentage), and ranges between 0-100. &#x00a0;As per the suggestion, a complete ablation study has been added to the Appendix.</p>
                <p> </p>
                <p> Minor:</p>
                <p> - We utilised bold to highlight points of discussion (particularly poor or particularly good performance). However, to avoid confusion the choice of formatting has been updated so that bold denotes the highest recall value, and underline denotes the second highest.</p>
                <p> - Yes, the GDN+ threshold notation should exclude i for A_{ji}, this has been amended.</p>
                <p> - Wording has been changed to &#x201c;Limited open-source software for anomaly detection&#x2026;&#x201d;.</p>
            </body>
        </sub-article>
    </sub-article>
</article>
