<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN" "http://jats.nlm.nih.gov/publishing/1.2/JATS-journalpublishing1.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="methods-article" dtd-version="1.2" xml:lang="en">
    <front>
        <journal-meta>
            <journal-id journal-id-type="pmc">F1000Research</journal-id>
            <journal-title-group>
                <journal-title>F1000Research</journal-title>
            </journal-title-group>
            <issn pub-type="epub">2046-1402</issn>
            <publisher>
                <publisher-name>F1000 Research Limited</publisher-name>
                <publisher-loc>London, UK</publisher-loc>
            </publisher>
        </journal-meta>
        <article-meta>
            <article-id pub-id-type="doi">10.12688/f1000research.122775.1</article-id>
            <article-categories>
                <subj-group subj-group-type="heading">
                    <subject>Method Article</subject>
                </subj-group>
                <subj-group>
                    <subject>Articles</subject>
                </subj-group>
            </article-categories>
            <title-group>
                <article-title>Spatial transcriptomics dimensionality reduction using wavelet bases</article-title>
                <fn-group content-type="pub-status">
                    <fn>
                        <p>[version 1; peer review: 3 approved with reservations, 1 not approved]</p>
                    </fn>
                </fn-group>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Xu</surname>
                        <given-names>Zhuoyan</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Software</role>
                    <role content-type="http://credit.niso.org/">Validation</role>
                    <role content-type="http://credit.niso.org/">Visualization</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0001-5776-9388</uri>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Sankaran</surname>
                        <given-names>Kris</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Funding Acquisition</role>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Software</role>
                    <role content-type="http://credit.niso.org/">Supervision</role>
                    <role content-type="http://credit.niso.org/">Validation</role>
                    <role content-type="http://credit.niso.org/">Visualization</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="corresp" rid="c1">a</xref>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <aff id="a1">
                    <label>1</label>Department of Statistics, University of Wisconsin - Madison, Madison, Wisconsin, 53706, USA</aff>
            </contrib-group>
            <author-notes>
                <corresp id="c1">
                    <label>a</label>
                    <email xlink:href="mailto:ksankaran@wisc.edu">ksankaran@wisc.edu</email>
                </corresp>
                <fn fn-type="conflict">
                    <p>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>12</day>
                <month>9</month>
                <year>2022</year>
            </pub-date>
            <pub-date pub-type="collection">
                <year>2022</year>
            </pub-date>
            <volume>11</volume>
            <elocation-id>1033</elocation-id>
            <history>
                <date date-type="accepted">
                    <day>2</day>
                    <month>9</month>
                    <year>2022</year>
                </date>
            </history>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2022 Xu Z and Sankaran K</copyright-statement>
                <copyright-year>2022</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access article distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <self-uri content-type="pdf" xlink:href="https://f1000research.com/articles/11-1033/pdf"/>
            <abstract>
                <p>Background: Spatially resolved transcriptomics (ST) measures gene expression along with the spatial coordinates of the measurements. The analysis of ST data involves significant computation complexity. In this work, we propose a gene expression dimensionality reduction algorithm that retains spatial structure.</p>
                <p> Methods: We combine the wavelet transformation with matrix factorization to select spatially-varying genes. We extract a low-dimensional representation of these genes. We adopt an Empirical Bayes perspective, imposing regularization through the prior distribution of factor genes. Additionally, we visualize the extracted representations, providing an overview of global spatial patterns. We illustrate the performance of our methods through spatial structure recovery and gene expression reconstruction using a simulation and real data analysis.</p>
                <p> Results: In real data experiments, our method identifies spatial structure of gene factors and outperforms regular decomposition regarding reconstruction error. We find a connection between the fluctuation of gene patterns and wavelet estimates, and this allows us to provide smoother visualizations. We develop the package and share the workflow generating reproducible quantitative results and gene visualization. The package is available at 
                    <ext-link ext-link-type="uri" xlink:href="https://github.com/OliverXUZY/waveST">https://github.com/OliverXUZY/waveST</ext-link>.</p>
                <p> Conclusions: We have proposed a pipeline for dimensionality reduction that respects spatial structure. Both simulations and real data experiments demonstrate that wavelet and shrinkage techniques show positive results in spatially resolved transcriptomics data. We highlight the idea of combining image processing techniques and statistical methods for application in a spatial genomics context</p>
            </abstract>
            <kwd-group kwd-group-type="author">
                <kwd>Spatial Transcriptomics</kwd>
                <kwd>Wavelet Transformation</kwd>
                <kwd>Empirical Bayes Matrix Factorization</kwd>
                <kwd>Factor Gene</kwd>
            </kwd-group>
            <funding-group>
                <award-group id="fund-1" xlink:href="http://dx.doi.org/10.13039/100006132">
                    <funding-source>Office of Science</funding-source>
                </award-group>
                <award-group id="fund-2" xlink:href="http://dx.doi.org/10.13039/100000001">
                    <funding-source>National Science Foundation</funding-source>
                </award-group>
                <funding-statement>This research was performed using the compute resources and assistance of the UW-Madison Center For High Throughput Computing (CHTC) in the Department of Computer Sciences. The CHTC is supported by UW-Madison, the Advanced Computing Initiative, the Wisconsin Alumni Research Foundation, the Wisconsin Institutes for Discovery, and the National Science Foundation, and is an active member of the OSG Consortium, which is supported by the National Science Foundation and the U.S. Department of Energy&#x2019;s Office of Science.</funding-statement>
                <funding-statement>
                    <italic>The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</italic>
                </funding-statement>
            </funding-group>
        </article-meta>
    </front>
    <body>
        <sec id="sec1" sec-type="intro">
            <title>Introduction</title>
            <p>Spatial resolved transcriptomics (ST) is a technology measuring spatial variation in gene expression. Different technological platforms support different genome coverage and spatial resolution (tissue-level measurement to subcellular measurement). Examples of ST platforms include Spatial Transcriptomics (
                <xref ref-type="bibr" rid="ref1">St&#x00e5;hl et al., 2016</xref>; 
                <xref ref-type="bibr" rid="ref2">Xia et al., 2019</xref>), 10x Genomics Visium, Slide-seq (
                <xref ref-type="bibr" rid="ref3">Rodriques et al., 2019</xref>), sci-Space (
                <xref ref-type="bibr" rid="ref4">Srivatsan et al., 2021</xref>), and MERFISH (
                <xref ref-type="bibr" rid="ref5">Chen et al., 2015</xref>). Spatial transcriptomics allows visualization and quantitative analysis of the transcriptome with spatial resolution in individual tissue sections. A series of studies combining gene expression and spatial information has been brought to generate new insight in biological analysis (
                <xref ref-type="bibr" rid="ref6">Kuppe et al., 2020</xref>; 
                <xref ref-type="bibr" rid="ref7">Shah et al., 2016</xref>; 
                <xref ref-type="bibr" rid="ref8">Berglund et al., 2018</xref>). Quantification of gene expression has wide applications in transcriptomics. Understanding the spatial distribution of gene expression has helped to answer fundamental questions in developmental biology (
                <xref ref-type="bibr" rid="ref9">Asp et al., 2019</xref>; 
                <xref ref-type="bibr" rid="ref10">R&#x00f6;delsperger et al., 2021</xref>), cancer (
                <xref ref-type="bibr" rid="ref11">Thrane et al., 2018</xref>; 
                <xref ref-type="bibr" rid="ref12">Moncada et al., 2020</xref>), and neuroscience (
                <xref ref-type="bibr" rid="ref13">Moffitt et al., 2016</xref>; 
                <xref ref-type="bibr" rid="ref14">Close et al., 2021</xref>). Two widely-used methods for gene expression quantification are fluorescent in situ hybridization (FISH) and next-generation sequencing.</p>
            <p>While these approaches have been made to measure gene expression while preserving spatial information, there are statistical challenges in analyses combining the gene and spatial information. Specifically, gene dimensionality reduction guided by spatial information is still an active area of study (
                <xref ref-type="bibr" rid="ref15">Abu-Jamous and Kelly, 2018</xref>; 
                <xref ref-type="bibr" rid="ref16 ref17">Kiselev et al., 2017, 2018</xref>; 
                <xref ref-type="bibr" rid="ref18">Zhu and Sabatti, 2020</xref>; 
                <xref ref-type="bibr" rid="ref19">Shang and Zhou, 2022</xref>; 
                <xref ref-type="bibr" rid="ref20">Velten et al., 2022</xref>; 
                <xref ref-type="bibr" rid="ref21">Townes and Engelhardt, 2021</xref>). In this work, we conduct a dimensionality reduction on spatially varying gene data. After transformation, gene expression over locations is a square matrix, which allows us to view it as an image. Therefore, tools from image processing can be adapted &#x2013; our method incorporates spatial information by wavelet transformation, a multi-scale analysis decompose sequence into orthonormal series. We apply wavelet transformation for each gene expression over locations. This is a common technique in denoising the image. We thus apply techniques analogous to image analysis. We evaluate performance using reconstruction error and comparing resulting visualizations. Our analysis pipeline builds from off-the-shelf tools, and code is available for reproducing our results. We also attach visualizations in our pipeline, giving example interpretations of intermediate result.</p>
            <p>A natural idea in spatial transcriptomic analysis is to identify gene types across spatial locations (
                <xref ref-type="bibr" rid="ref15">Abu-Jamous and Kelly, 2018</xref>). For example, in clustering the representations in each cluster achieve dimensionality reduction under the assumption that genes within one cluster have the same type. A parallel technique is clustering based on cell type instead. General clustering methods can be combined into more sophisticated pipelines tailored toward spatial single-cell analysis. SC3 (single-cell consensus clustering 3) (
                <xref ref-type="bibr" rid="ref16">Kiselev et al., 2017</xref>) is an ensemble clustering method. It calculates distance matrices across cell locations using the Euclidean distance, then applies spectral clustering, and assigns membership (
                <xref ref-type="bibr" rid="ref17">Kiselev et al., 2018</xref>). To identify the cell type in each cluster, one can perform differential expression analysis between all pairs of clusters. scGeneFit uses a label-aware compression method to find marker genes (
                <xref ref-type="bibr" rid="ref22">Dumitrascu et al., 2021</xref>). Given the cell-by-gene expression matrix and cell clustering membership, scGeneFit maps cells to lower-dimensional space where cells within the same cluster are closer. For gene dimensionality reduction, (
                <xref ref-type="bibr" rid="ref18">Zhu and Sabatti, 2020</xref>) constructs a neighborhood graph from the spatial coordinates, then applies a graph-based feature selection procedure to determine spatially varying genes. They also provide the option to infer a latent graph embedding for cells based on selected genes, applying spline models to fit the gene&#x2019;s expression on the latent embedding. Then they leverage the fitted coefficients to reduce the dimensionality of each gene. (
                <xref ref-type="bibr" rid="ref23">Svensson et al., 2018</xref>) proposed pipelines using mixed-effect models incorporating spatial information. The model contains two random effect terms: a spatial variance term that parametrizes gene expression covariance by pairwise distance between samples and a noise term that models nonspatial variability. The model leverages efficient inference methods previously developed for linear mixed models, and it is computationally efficient.</p>
            <p>Our setting is closely aligned with the following recent works: (
                <xref ref-type="bibr" rid="ref19">Shang and Zhou, 2022</xref>) developed SpatialPCA, applying probabilistic principal component analysis (PCA) on ST data for dimensionality reduction. They assume data are given as a location-by-genes matrix and construct a regression model similar to factor analysis, where the prior covariance matrix of factor genes is a distance matrix constructed with a Gaussian kernel. (
                <xref ref-type="bibr" rid="ref20">Velten et al., 2022</xref>) proposed MEFISTO, combining factor analysis with the non-parametric framework of Gaussian processes to model spatio-temporal dependencies in the latent space. (
                <xref ref-type="bibr" rid="ref21">Townes and Engelhardt, 2021</xref>) developed nonnegative spatial factorization (NSF), combining a Gaussian process prior over spatial locations and a Poisson or negative binomial likelihood for count data, identifying generalizable spatial patterns of gene expression. All these works impose spatial structure on the prior of the factor genes. While these methods offer new dimensionality reduction techniques to cluster the genes, the complex model structure and a large number of hyperparameters introduce uncertainty and noise. Instead of imposing structural assumptions on the prior of the factor genes, we impose structure on the factor gene itself. Our contributions are the following:
                <list list-type="bullet">
                    <list-item>
                        <label>&#x2022;</label>
                        <p>We propose an approach, based on techniques from matrix decomposition and image signal processing to perform gene dimensionality reduction that retains inferred spatial structure.</p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>We run simulations showing that wavelet-guided dimensionality reduction performs better estimation than the singular value decomposition (SVD) under low signal-to-noise (SNR) regime.</p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>We perform real data experiments to identify the connection between wavelet techniques and fluctuation of the gene expression, which would be useful in selecting spatially related genes based on reconstruction error.</p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>We provide a gene extraction pipeline capturing the global information of spatially related genes. We provide smoother visualization of gene factors via wavelet methods. We develop an R package 
                            <monospace>waveST</monospace> and share the workflow generating reproducible quantitative results and gene visualization.</p>
                    </list-item>
                </list>
            </p>
            <p>The diagram for workflow can be seen in 
                <xref ref-type="fig" rid="f2">Figure 1</xref>. The paper is organized in following: In Section Background we introduce the background on required techniques, including the wavelet transformation and matrix decompositions. In Section Problem Setup, we formally define our problem under this setting, and Section Methods introduces our algorithms and analysis pipeline. In Section Simulation, we implement simulations showing the effect of wavelet transformation in reducing error. In Section Real Data Experiment we conduct our method on data from 
                <xref ref-type="bibr" rid="ref24">Weber (2021)</xref>, showing the reconstruction error in dimension reduction and visualization of lower-dimensional representations.</p>
            <fig fig-type="figure" id="f2" orientation="portrait" position="float">
                <label>Figure 1. </label>
                <caption>
                    <title>A summary of the proposed workflow.</title>
                    <p>
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mover accent="true">
                                    <mml:mi>C</mml:mi>
                                    <mml:mo stretchy="true">&#x0302;</mml:mo>
                                </mml:mover>
                            </mml:math>
                        </inline-formula> and 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mover accent="true">
                                    <mml:mi>Z</mml:mi>
                                    <mml:mo stretchy="true">&#x0302;</mml:mo>
                                </mml:mover>
                            </mml:math>
                        </inline-formula> are the estimators for 
                        <italic toggle="yes">C</italic> and 
                        <italic toggle="yes">Z.</italic> 
                        <italic toggle="yes">F</italic> and 
                        <italic toggle="yes">L</italic> are the factor matrix and loading matrix construct as 
                        <italic toggle="yes">Z</italic> = 
                        <italic toggle="yes">FL</italic>, 
                        <italic toggle="yes">F
                            <sub>c</sub>
                        </italic> and 
                        <italic toggle="yes">L
                            <sub>c</sub>
                        </italic> are the factor matrix and loading matrix construct as 
                        <italic toggle="yes">C</italic> = 
                        <italic toggle="yes">F
                            <sub>c</sub>L
                            <sub>c</sub>.</italic> 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mover accent="true">
                                    <mml:mi>F</mml:mi>
                                    <mml:mo stretchy="true">&#x0302;</mml:mo>
                                </mml:mover>
                            </mml:math>
                        </inline-formula> and 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mover accent="true">
                                    <mml:msub>
                                        <mml:mi>F</mml:mi>
                                        <mml:mi>c</mml:mi>
                                    </mml:msub>
                                    <mml:mo stretchy="true">&#x0302;</mml:mo>
                                </mml:mover>
                            </mml:math>
                        </inline-formula> are the estimators for 
                        <italic toggle="yes">F</italic> and 
                        <italic toggle="yes">F
                            <sub>c</sub>
                        </italic>, similiar for 
                        <italic toggle="yes">Z</italic> and 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mover accent="true">
                                    <mml:mi>Z</mml:mi>
                                    <mml:mo stretchy="true">&#x0302;</mml:mo>
                                </mml:mover>
                            </mml:math>
                        </inline-formula>. We will specify the details in Section Background and Problem Setup.</p>
                </caption>
                <graphic id="gr1" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/134806/86da6507-272d-4572-b5ee-a93f96a263ce_figure1.gif"/>
            </fig>
        </sec>
        <sec id="sec2">
            <title>Background</title>
            <p>In this section, we will cover background of methods we used in data pre-processing and analysis.</p>
            <sec id="sec3">
                <title>Wavelet transformation</title>
                <p>Density estimation and function approximation is a fundamental problem in statistics and machine learning. Non-parametric methods such as spline regression (
                    <xref ref-type="bibr" rid="ref25">Perperoglou et al., 2019</xref>), Fourier transformation (
                    <xref ref-type="bibr" rid="ref26">Cochran et al., 1967</xref>) and Wavelet transformation (
                    <xref ref-type="bibr" rid="ref27">Nason, 2008</xref>) have been used in such scenarios.</p>
                <p>Consider a model with one predictor: 
                    <italic toggle="yes">y</italic> = 
                    <italic toggle="yes">f</italic>(
                    <italic toggle="yes">x</italic>) + 
                    <italic toggle="yes">&#x03f5;</italic>, where 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msup>
                                <mml:mrow>
                                    <mml:mi mathvariant="double-struck">E</mml:mi>
                                    <mml:mi mathvariant="normal">&#x03f5;</mml:mi>
                                </mml:mrow>
                                <mml:mn>2</mml:mn>
                            </mml:msup>
                            <mml:mo>=</mml:mo>
                            <mml:msup>
                                <mml:mi>&#x03c3;</mml:mi>
                                <mml:mn>2</mml:mn>
                            </mml:msup>
                        </mml:math>
                    </inline-formula>. We want to estimate 
                    <italic toggle="yes">f</italic>, the 
                    <italic toggle="yes">trend</italic> of the function. We assume the predictors are ordered 
                    <italic toggle="yes">x</italic>
                    <sub>1</sub> &lt; 
                    <italic toggle="yes">x</italic>
                    <sub>2</sub> &lt; &#x2026; &lt; 
                    <italic toggle="yes">x
                        <sub>n</sub>.</italic> We have a signal or frequency to estimate.</p>
                <p>Consider the 
                    <italic toggle="yes">Haar mother</italic> wavelet:
                    <disp-formula id="e1">
                        <mml:math display="block">
                            <mml:mi>&#x03c8;</mml:mi>
                            <mml:mfenced close=")" open="(">
                                <mml:mi>x</mml:mi>
                            </mml:mfenced>
                            <mml:mo>=</mml:mo>
                            <mml:mfenced close="" open="{">
                                <mml:mtable columnalign="center">
                                    <mml:mtr>
                                        <mml:mtd>
                                            <mml:mn>1</mml:mn>
                                        </mml:mtd>
                                        <mml:mtd>
                                            <mml:mi>x</mml:mi>
                                            <mml:mo>&#x2208;</mml:mo>
                                            <mml:mfenced close=")" open="[" separators=",">
                                                <mml:mn>0</mml:mn>
                                                <mml:mfrac>
                                                    <mml:mn>1</mml:mn>
                                                    <mml:mn>2</mml:mn>
                                                </mml:mfrac>
                                            </mml:mfenced>
                                        </mml:mtd>
                                    </mml:mtr>
                                    <mml:mtr>
                                        <mml:mtd>
                                            <mml:mo>&#x2212;</mml:mo>
                                            <mml:mn>1</mml:mn>
                                        </mml:mtd>
                                        <mml:mtd>
                                            <mml:mi>x</mml:mi>
                                            <mml:mo>&#x2208;</mml:mo>
                                            <mml:mfenced close=")" open="[" separators=",">
                                                <mml:mfrac>
                                                    <mml:mn>1</mml:mn>
                                                    <mml:mn>2</mml:mn>
                                                </mml:mfrac>
                                                <mml:mn>1</mml:mn>
                                            </mml:mfenced>
                                        </mml:mtd>
                                    </mml:mtr>
                                    <mml:mtr>
                                        <mml:mtd>
                                            <mml:mn>0</mml:mn>
                                        </mml:mtd>
                                        <mml:mtd>
                                            <mml:mtext>otherwise</mml:mtext>
                                        </mml:mtd>
                                    </mml:mtr>
                                </mml:mtable>
                            </mml:mfenced>
                        </mml:math>
                    </disp-formula>satisfying 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msubsup>
                                <mml:mo>&#x222b;</mml:mo>
                                <mml:mrow>
                                    <mml:mo>&#x2212;</mml:mo>
                                    <mml:mo>&#x221e;</mml:mo>
                                </mml:mrow>
                                <mml:mo>&#x221e;</mml:mo>
                            </mml:msubsup>
                            <mml:mi>&#x03c8;</mml:mi>
                            <mml:mfenced close=")" open="(">
                                <mml:mi>x</mml:mi>
                            </mml:mfenced>
                            <mml:mi mathvariant="italic">dx</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:mn>0</mml:mn>
                        </mml:math>
                    </inline-formula>. Unlike the Fourier basis, wavelets oscillate and decay fast, only contributing to a certain local area and zero elsewhere. We can generate wavelets from the Haar mother wavelet 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi>&#x03c8;</mml:mi>
                                <mml:mrow>
                                    <mml:mi>j</mml:mi>
                                    <mml:mo>,</mml:mo>
                                    <mml:mi>k</mml:mi>
                                </mml:mrow>
                            </mml:msub>
                            <mml:mfenced close=")" open="(">
                                <mml:mi>x</mml:mi>
                            </mml:mfenced>
                            <mml:mo>=</mml:mo>
                            <mml:msup>
                                <mml:mn>2</mml:mn>
                                <mml:mrow>
                                    <mml:mi>j</mml:mi>
                                    <mml:mo>/</mml:mo>
                                    <mml:mn>2</mml:mn>
                                </mml:mrow>
                            </mml:msup>
                            <mml:mi>&#x03c8;</mml:mi>
                            <mml:mfenced close=")" open="(">
                                <mml:mrow>
                                    <mml:msup>
                                        <mml:mn>2</mml:mn>
                                        <mml:mi>j</mml:mi>
                                    </mml:msup>
                                    <mml:mi>x</mml:mi>
                                    <mml:mo>&#x2212;</mml:mo>
                                    <mml:mi>k</mml:mi>
                                </mml:mrow>
                            </mml:mfenced>
                        </mml:math>
                    </inline-formula> for integer 
                    <italic toggle="yes">j</italic>, 
                    <italic toggle="yes">k.</italic> These wavelets form orthonormal sets. We can decompose the trend as 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>f</mml:mi>
                            <mml:mfenced close=")" open="(">
                                <mml:mi>x</mml:mi>
                            </mml:mfenced>
                            <mml:mo>=</mml:mo>
                            <mml:msubsup>
                                <mml:mo>&#x2211;</mml:mo>
                                <mml:mrow>
                                    <mml:mi>j</mml:mi>
                                    <mml:mo>=</mml:mo>
                                    <mml:mo>&#x2212;</mml:mo>
                                    <mml:mo>&#x221e;</mml:mo>
                                </mml:mrow>
                                <mml:mo>&#x221e;</mml:mo>
                            </mml:msubsup>
                            <mml:msubsup>
                                <mml:mo>&#x2211;</mml:mo>
                                <mml:mrow>
                                    <mml:mi>k</mml:mi>
                                    <mml:mo>=</mml:mo>
                                    <mml:mo>&#x2212;</mml:mo>
                                    <mml:mo>&#x221e;</mml:mo>
                                </mml:mrow>
                                <mml:mo>&#x221e;</mml:mo>
                            </mml:msubsup>
                            <mml:msub>
                                <mml:mi>d</mml:mi>
                                <mml:mrow>
                                    <mml:mi>j</mml:mi>
                                    <mml:mo>,</mml:mo>
                                    <mml:mi>k</mml:mi>
                                </mml:mrow>
                            </mml:msub>
                            <mml:msub>
                                <mml:mi>&#x03c8;</mml:mi>
                                <mml:mrow>
                                    <mml:mi>j</mml:mi>
                                    <mml:mo>,</mml:mo>
                                    <mml:mi>k</mml:mi>
                                </mml:mrow>
                            </mml:msub>
                            <mml:mfenced close=")" open="(">
                                <mml:mi>x</mml:mi>
                            </mml:mfenced>
                        </mml:math>
                    </inline-formula>, where 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi>d</mml:mi>
                                <mml:mrow>
                                    <mml:mi>j</mml:mi>
                                    <mml:mo>,</mml:mo>
                                    <mml:mi>k</mml:mi>
                                </mml:mrow>
                            </mml:msub>
                            <mml:mo>=</mml:mo>
                            <mml:msubsup>
                                <mml:mo>&#x222b;</mml:mo>
                                <mml:mrow>
                                    <mml:mo>&#x2212;</mml:mo>
                                    <mml:mo>&#x221e;</mml:mo>
                                </mml:mrow>
                                <mml:mo>&#x221e;</mml:mo>
                            </mml:msubsup>
                            <mml:mi>f</mml:mi>
                            <mml:mfenced close=")" open="(">
                                <mml:mi>x</mml:mi>
                            </mml:mfenced>
                            <mml:msub>
                                <mml:mi>&#x03c8;</mml:mi>
                                <mml:mrow>
                                    <mml:mi>j</mml:mi>
                                    <mml:mo>,</mml:mo>
                                    <mml:mi>k</mml:mi>
                                </mml:mrow>
                            </mml:msub>
                            <mml:mfenced close=")" open="(">
                                <mml:mi>x</mml:mi>
                            </mml:mfenced>
                            <mml:mi mathvariant="italic">dx</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:mo>&lt;</mml:mo>
                            <mml:mi>f</mml:mi>
                            <mml:mo>,</mml:mo>
                            <mml:msub>
                                <mml:mi>&#x03c8;</mml:mi>
                                <mml:mrow>
                                    <mml:mi>j</mml:mi>
                                    <mml:mo>,</mml:mo>
                                    <mml:mi>k</mml:mi>
                                </mml:mrow>
                            </mml:msub>
                            <mml:mo>&gt;</mml:mo>
                        </mml:math>
                    </inline-formula> are the wavelet coefficients.</p>
                <p>Wavelet based methods have several advantages among other non-parametric methods, especially dealing with sparse data. One property wavelet has is localization. If a sequence has a discontinuity, this will only influence the wavelet basis around it. In contrast, for a Fourier basis consisting of sine and cosine functions at different frequencies, every basis element will interact with this discontinuity, hence influencing every Fourier coefficient.</p>
                <p>The simplest discrete wavelet transformation calculates the difference and sums between each adjacent pair. Suppose we have one vector with length 
                    <italic toggle="yes">n</italic>, where 
                    <italic toggle="yes">n</italic> is dyadic (
                    <italic toggle="yes">n</italic> = 2
                    <italic toggle="yes">
                        <sup>J</sup>
                    </italic>). We computed 
                    <italic toggle="yes">d</italic>
                    <sub>
                        <italic toggle="yes">J</italic>&#x2212;1, 
                        <italic toggle="yes">k</italic>
                    </sub> = 
                    <italic toggle="yes">y</italic>
                    <sub>2
                        <italic toggle="yes">k</italic>
                    </sub> &#x2212; 
                    <italic toggle="yes">y</italic>
                    <sub>2
                        <italic toggle="yes">k</italic>&#x2212;1</sub> as finest-level detail and 
                    <italic toggle="yes">c</italic>
                    <sub>
                        <italic toggle="yes">J</italic>&#x2212;1, 
                        <italic toggle="yes">k</italic>
                    </sub> = 
                    <italic toggle="yes">y</italic>
                    <sub>2
                        <italic toggle="yes">k</italic>
                    </sub> + 
                    <italic toggle="yes">y</italic>
                    <sub>2
                        <italic toggle="yes">k</italic>&#x2212;1</sub> as the finest-level averages. We have 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mfenced close="}" open="{">
                                <mml:msub>
                                    <mml:mi>d</mml:mi>
                                    <mml:mi>k</mml:mi>
                                </mml:msub>
                            </mml:mfenced>
                        </mml:math>
                    </inline-formula> containing 
                    <italic toggle="yes">n</italic>/2 = 2
                    <sup>
                        <italic toggle="yes">J</italic>&#x2212;1</sup> as our finest level coefficients. To obtain the next coarsest coeffients we set:
                    <disp-formula id="e2">
                        <mml:math display="block">
                            <mml:msub>
                                <mml:mi>d</mml:mi>
                                <mml:mrow>
                                    <mml:mi>J</mml:mi>
                                    <mml:mo>&#x2212;</mml:mo>
                                    <mml:mn>2</mml:mn>
                                    <mml:mo>,</mml:mo>
                                    <mml:mi mathvariant="normal">&#x2113;</mml:mi>
                                </mml:mrow>
                            </mml:msub>
                            <mml:mo>=</mml:mo>
                            <mml:msub>
                                <mml:mi>c</mml:mi>
                                <mml:mrow>
                                    <mml:mi>J</mml:mi>
                                    <mml:mo>&#x2212;</mml:mo>
                                    <mml:mn>1</mml:mn>
                                    <mml:mo>,</mml:mo>
                                    <mml:mn>2</mml:mn>
                                    <mml:mi mathvariant="normal">&#x2113;</mml:mi>
                                </mml:mrow>
                            </mml:msub>
                            <mml:mo>&#x2212;</mml:mo>
                            <mml:msub>
                                <mml:mi>c</mml:mi>
                                <mml:mrow>
                                    <mml:mi>J</mml:mi>
                                    <mml:mo>&#x2212;</mml:mo>
                                    <mml:mn>1</mml:mn>
                                    <mml:mo>,</mml:mo>
                                    <mml:mn>2</mml:mn>
                                    <mml:mi mathvariant="normal">&#x2113;</mml:mi>
                                    <mml:mo>&#x2212;</mml:mo>
                                    <mml:mn>1</mml:mn>
                                </mml:mrow>
                            </mml:msub>
                        </mml:math>
                        <label>(1)</label>
                    </disp-formula>
                    <disp-formula id="e3">
                        <mml:math display="block">
                            <mml:mtable columnalign="left" displaystyle="true">
                                <mml:mtr>
                                    <mml:mtd>
                                        <mml:msub>
                                            <mml:mi>c</mml:mi>
                                            <mml:mrow>
                                                <mml:mi>J</mml:mi>
                                                <mml:mo>&#x2212;</mml:mo>
                                                <mml:mn>2</mml:mn>
                                                <mml:mo>,</mml:mo>
                                                <mml:mi mathvariant="normal">&#x2113;</mml:mi>
                                            </mml:mrow>
                                        </mml:msub>
                                    </mml:mtd>
                                </mml:mtr>
                            </mml:mtable>
                            <mml:mo>=</mml:mo>
                            <mml:msub>
                                <mml:mi>c</mml:mi>
                                <mml:mrow>
                                    <mml:mi>J</mml:mi>
                                    <mml:mo>&#x2212;</mml:mo>
                                    <mml:mn>1</mml:mn>
                                    <mml:mo>,</mml:mo>
                                    <mml:mn>2</mml:mn>
                                    <mml:mi mathvariant="normal">&#x2113;</mml:mi>
                                </mml:mrow>
                            </mml:msub>
                            <mml:mo>+</mml:mo>
                            <mml:msub>
                                <mml:mi>c</mml:mi>
                                <mml:mrow>
                                    <mml:mi>J</mml:mi>
                                    <mml:mo>&#x2212;</mml:mo>
                                    <mml:mn>1</mml:mn>
                                    <mml:mo>,</mml:mo>
                                    <mml:mn>2</mml:mn>
                                    <mml:mi mathvariant="normal">&#x2113;</mml:mi>
                                    <mml:mo>&#x2212;</mml:mo>
                                    <mml:mn>1</mml:mn>
                                </mml:mrow>
                            </mml:msub>
                        </mml:math>
                        <label>(2)</label>
                    </disp-formula>
                </p>
                <p>We continue this procedure until 
                    <italic toggle="yes">j</italic> reaches one. The set of details 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mfenced close="}" open="{">
                                <mml:msub>
                                    <mml:mi>d</mml:mi>
                                    <mml:mi>k</mml:mi>
                                </mml:msub>
                            </mml:mfenced>
                        </mml:math>
                    </inline-formula> across levels are our wavelet coefficients.</p>
                <p>We use 
                    <italic toggle="yes">J</italic> to denote the scale of wavelets, with larger 
                    <italic toggle="yes">J</italic> we have finer scale and approximation. However, finer scale sometimes introduces more parameters to capture minor details of the sequence (overfitting). The trade-off in choosing 
                    <italic toggle="yes">J</italic> is crucial in the wavelet transformation.</p>
                <p>Regularization and smoothing can be used to prevent overfitting. Regularization is usually conducted by shrinking wavelet coefficients. The concept of wavelet shrinkage was first proposed by 
                    <xref ref-type="bibr" rid="ref28">Donoho and Johnstone (1994)</xref>. The motivation behind wavelet shrinkage is straightforward. Consider empirical wavelet coefficients, the large coefficients usually contain true signal and noise, whereas the small coefficients only contain noise. The shrinkage is often used when the wavelet coefficients are assumed to be a 
                    <italic toggle="yes">sparse vector.</italic>
                </p>
                <p>Wavelet shrinkage is often conducted by setting a 
                    <italic toggle="yes">threshold</italic> (denoted by 
                    <italic toggle="yes">&#x03b4;</italic>) and only keeping the coefficients above the threshold. To choose a threshold, a natural metric is the squared error between estimated function and the truth: 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mover accent="true">
                                <mml:mi>M</mml:mi>
                                <mml:mo stretchy="true">&#x0302;</mml:mo>
                            </mml:mover>
                            <mml:mo>=</mml:mo>
                            <mml:msubsup>
                                <mml:mo>&#x2211;</mml:mo>
                                <mml:mrow>
                                    <mml:mi>i</mml:mi>
                                    <mml:mo>=</mml:mo>
                                    <mml:mn>1</mml:mn>
                                </mml:mrow>
                                <mml:mi>n</mml:mi>
                            </mml:msubsup>
                            <mml:msup>
                                <mml:mfenced close=")" open="(">
                                    <mml:mrow>
                                        <mml:mi>f</mml:mi>
                                        <mml:mfenced close=")" open="(">
                                            <mml:msub>
                                                <mml:mi>x</mml:mi>
                                                <mml:mi>i</mml:mi>
                                            </mml:msub>
                                        </mml:mfenced>
                                        <mml:mo>&#x2212;</mml:mo>
                                        <mml:mover accent="true">
                                            <mml:mi>f</mml:mi>
                                            <mml:mo stretchy="true">&#x0302;</mml:mo>
                                        </mml:mover>
                                        <mml:mfenced close=")" open="(">
                                            <mml:msub>
                                                <mml:mi>x</mml:mi>
                                                <mml:mi>i</mml:mi>
                                            </mml:msub>
                                        </mml:mfenced>
                                    </mml:mrow>
                                </mml:mfenced>
                                <mml:mn>2</mml:mn>
                            </mml:msup>
                        </mml:math>
                    </inline-formula> and 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>M</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:mi mathvariant="double-struck">E</mml:mi>
                            <mml:mover accent="true">
                                <mml:mi>M</mml:mi>
                                <mml:mo stretchy="true">&#x0302;</mml:mo>
                            </mml:mover>
                        </mml:math>
                    </inline-formula>. 
                    <xref ref-type="bibr" rid="ref29">Donoho et al. (1994)</xref> proposed a 
                    <italic toggle="yes">universal threshold</italic> 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>&#x03b4;</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:mi>&#x03c3;</mml:mi>
                            <mml:msqrt>
                                <mml:mrow>
                                    <mml:mn>2</mml:mn>
                                    <mml:mo>log</mml:mo>
                                    <mml:mi>n</mml:mi>
                                </mml:mrow>
                            </mml:msqrt>
                        </mml:math>
                    </inline-formula>, which induced 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>M</mml:mi>
                            <mml:mo>&#x2264;</mml:mo>
                            <mml:mi mathvariant="script">O</mml:mi>
                            <mml:mfenced close=")" open="(">
                                <mml:mrow>
                                    <mml:mo>log</mml:mo>
                                    <mml:mi>n</mml:mi>
                                    <mml:msup>
                                        <mml:mi>&#x03c3;</mml:mi>
                                        <mml:mn>2</mml:mn>
                                    </mml:msup>
                                </mml:mrow>
                            </mml:mfenced>
                        </mml:math>
                    </inline-formula>. 
                    <xref ref-type="bibr" rid="ref30">Donoho and Johnstone (1995)</xref> also proposed 
                    <italic toggle="yes">Stein&#x2019;s unbiased risk estimation (SURE) threshold</italic> based on Stein&#x2019;s (1981) unbiased risk estimator. The optimal SURE threshold can be obtained in 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi mathvariant="script">O</mml:mi>
                            <mml:mfenced close=")" open="(">
                                <mml:mrow>
                                    <mml:mi>n</mml:mi>
                                    <mml:mo>log</mml:mo>
                                    <mml:mi>n</mml:mi>
                                </mml:mrow>
                            </mml:mfenced>
                        </mml:math>
                    </inline-formula> operations. 
                    <xref ref-type="bibr" rid="ref30">Donoho and Johnstone (1995)</xref> also noted that SURE sometimes failed when the true signal coefficients are highly sparse, they proposed a hybrid scheme called 
                    <italic toggle="yes">SureShrink</italic>, combining the SURE and universal thresholds, using them depending on certain situations.</p>
                <p>The extension of wavelet methods to 2D regularly spaced data (images) and such data in higher dimensions was proposed by 
                    <xref ref-type="bibr" rid="ref31">Mallat (1989)</xref>. We only consider 2-D wavelet transformation since we decompose 2-D spatial gene expression data. Suppose we have 
                    <italic toggle="yes">n</italic> &#x00d7; 
                    <italic toggle="yes">n</italic> matrix 
                    <italic toggle="yes">A</italic> where 
                    <italic toggle="yes">n</italic> = 2
                    <italic toggle="yes">
                        <sup>J</sup>
                    </italic> is dyadic. A simple discrete wavelet transformation on 
                    <italic toggle="yes">A</italic> first applies procedure (1) and (2) to the rows of the matrix. We then have two matrices of size 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>n</mml:mi>
                            <mml:mo>&#x00d7;</mml:mo>
                            <mml:mfrac>
                                <mml:mi>n</mml:mi>
                                <mml:mn>2</mml:mn>
                            </mml:mfrac>
                        </mml:math>
                    </inline-formula>, called 
                    <italic toggle="yes">H</italic> and 
                    <italic toggle="yes">G.</italic> Then we apply the same procedures to both the columns of 
                    <italic toggle="yes">H</italic> and 
                    <italic toggle="yes">G</italic>, resulting in four matrices 
                    <italic toggle="yes">HH</italic>, 
                    <italic toggle="yes">GH</italic>, 
                    <italic toggle="yes">HG</italic>, and 
                    <italic toggle="yes">GG</italic> each of size 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mfrac>
                                <mml:mi>n</mml:mi>
                                <mml:mn>2</mml:mn>
                            </mml:mfrac>
                            <mml:mo>&#x00d7;</mml:mo>
                            <mml:mfrac>
                                <mml:mi>n</mml:mi>
                                <mml:mn>2</mml:mn>
                            </mml:mfrac>
                        </mml:math>
                    </inline-formula>. These are our finest level coefficients. 
                    <italic toggle="yes">HH</italic> is the local average of the original matrix used for the next level procedure.</p>
            </sec>
            <sec id="sec4">
                <title>Factor genes</title>
                <p>Clustering and dimensionality reduction are widely used in genomics. In a gene-by-sample matrix, genes are often grouped into profiles, where genes from the same profile have a similar function. Statistically, we can treat them as correlated variables. We use the term factor genes as the principal component in our gene-by-sample data. Factor genes compose the linear combinations of genes. We consider each gene as a variable, and we aim to find variables (
                    <bold>
                        <italic toggle="yes">f</italic>
                    </bold>
                    <sub>1</sub>, &#x2026;, 
                    <bold>
                        <italic toggle="yes">f</italic>
                    </bold>
                    <italic toggle="yes">
                        <sub>K</sub>
                    </italic>) such that each gene 
                    <italic toggle="yes">g
                        <sub>i</sub>
                    </italic> has form 
                    <bold>
                        <italic toggle="yes">g</italic>
                    </bold>
                    <italic toggle="yes">
                        <sub>i</sub>
                    </italic> = 
                    <italic toggle="yes">a</italic>
                    <sub>1</sub>
                    <bold>
                        <italic toggle="yes">f</italic>
                    </bold>
                    <sub>1</sub> + 
                    <italic toggle="yes">a</italic>
                    <sub>2</sub>
                    <bold>
                        <italic toggle="yes">f</italic>
                    </bold>
                    <sub>2</sub> + &#x22ef;
                    <italic toggle="yes">a
                        <sub>K</sub>
                    </italic>
                    <bold>
                        <italic toggle="yes">f</italic>
                    </bold>
                    <italic toggle="yes">
                        <sub>K</sub>
                    </italic>, where 
                    <italic toggle="yes">a
                        <sub>j</sub>
                    </italic> are coefficients.</p>
                <p>Suppose we have gene(
                    <italic toggle="yes">P</italic>)-by-sample(
                    <italic toggle="yes">N</italic>) matrix 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>A</mml:mi>
                            <mml:mo>&#x2208;</mml:mo>
                            <mml:msup>
                                <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                                <mml:mrow>
                                    <mml:mi>N</mml:mi>
                                    <mml:mo>&#x00d7;</mml:mo>
                                    <mml:mi>P</mml:mi>
                                </mml:mrow>
                            </mml:msup>
                        </mml:math>
                    </inline-formula> with sample covariance matrix 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>S</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:mfrac>
                                <mml:mn>1</mml:mn>
                                <mml:mi>n</mml:mi>
                            </mml:mfrac>
                            <mml:msup>
                                <mml:mi>&#x00c3;</mml:mi>
                                <mml:mi>T</mml:mi>
                            </mml:msup>
                            <mml:mi>&#x00c3;</mml:mi>
                        </mml:math>
                    </inline-formula>, where 
                    <italic toggle="yes">&#x00c3;</italic> is the column centered 
                    <italic toggle="yes">A.</italic> Consider the SVD on 
                    <italic toggle="yes">&#x00c3;</italic> = 
                    <italic toggle="yes">U</italic>&#x039b;
                    <italic toggle="yes">V
                        <sup>T</sup>.</italic> Then we have 
                    <italic toggle="yes">SV</italic> = 
                    <italic toggle="yes">V</italic>&#x039b;
                    <sup>2</sup>. The columns of 
                    <italic toggle="yes">V</italic> are called 
                    <italic toggle="yes">eigenarrays</italic> (
                    <xref ref-type="bibr" rid="ref32">Alter et al., 2000</xref>). The first 
                    <italic toggle="yes">K</italic> columns of 
                    <italic toggle="yes">Z</italic> = 
                    <italic toggle="yes">AV</italic> are the 
                    <italic toggle="yes">factor genes.</italic> The factor genes capture the mutual underlying information of genes.</p>
            </sec>
            <sec id="sec5">
                <title>Empirical Bayes matrix factorization (EBMF)</title>
                <p>Matrix Factorization is often used in capture factor genes. We have a formulation similar to 
                    <xref ref-type="bibr" rid="ref33">Wang and Stephens (2021)</xref>, consider the factorization model on observed samples by gene data 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>Y</mml:mi>
                            <mml:mo>&#x2208;</mml:mo>
                            <mml:msup>
                                <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                                <mml:mrow>
                                    <mml:mi>N</mml:mi>
                                    <mml:mo>&#x00d7;</mml:mo>
                                    <mml:mi>P</mml:mi>
                                </mml:mrow>
                            </mml:msup>
                        </mml:math>
                    </inline-formula>
                    <disp-formula id="e4">
                        <mml:math display="block">
                            <mml:mi>Y</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:msup>
                                <mml:mi mathvariant="italic">FL</mml:mi>
                                <mml:mi>T</mml:mi>
                            </mml:msup>
                            <mml:mo>+</mml:mo>
                            <mml:mi>E</mml:mi>
                        </mml:math>
                    </disp-formula>where 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>F</mml:mi>
                            <mml:mo>&#x2208;</mml:mo>
                            <mml:msup>
                                <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                                <mml:mrow>
                                    <mml:mi>N</mml:mi>
                                    <mml:mo>&#x00d7;</mml:mo>
                                    <mml:mi>K</mml:mi>
                                </mml:mrow>
                            </mml:msup>
                        </mml:math>
                    </inline-formula> denotes the factors, 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>L</mml:mi>
                            <mml:mo>&#x2208;</mml:mo>
                            <mml:msup>
                                <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                                <mml:mrow>
                                    <mml:mi>P</mml:mi>
                                    <mml:mo>&#x00d7;</mml:mo>
                                    <mml:mi>K</mml:mi>
                                </mml:mrow>
                            </mml:msup>
                        </mml:math>
                    </inline-formula> denotes the loadings for each factor, and 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>E</mml:mi>
                            <mml:mo>&#x2208;</mml:mo>
                            <mml:msup>
                                <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                                <mml:mrow>
                                    <mml:mi>N</mml:mi>
                                    <mml:mo>&#x00d7;</mml:mo>
                                    <mml:mi>P</mml:mi>
                                </mml:mrow>
                            </mml:msup>
                        </mml:math>
                    </inline-formula> denotes Gaussian noise with zero mean. Typical formulations would treat factors and loadings as fixed effects and use Maximum Likelihood Estimation (MLE). In our high-dimensional setting, penalty-based regularizers are often considered. Considering a prior for 
                    <italic toggle="yes">L</italic> and 
                    <italic toggle="yes">F</italic> under Bayesian setting has a similar effect and several advantages over adding a regularizer alone, such as simplifying hyperparameter search and selection of the number of factors 
                    <italic toggle="yes">K.</italic>
                </p>
                <p>One feature of empirical Bayes approaches in matrix factorization proposed by 
                    <xref ref-type="bibr" rid="ref34">Bishop (1999)</xref> is that the methods automatically select the number of factors 
                    <italic toggle="yes">K.</italic> In 
                    <xref ref-type="bibr" rid="ref33">Wang and Stephens (2021)</xref>, the author add factors with prior one at a time, estimating priors at each step until convergence. If the computed prior of the newly added factor is almost point mass on 0, the algorithm eliminates this factor and returns.</p>
                <p>Following 
                    <xref ref-type="bibr" rid="ref33">Wang and Stephens (2021)</xref>, we consider 
                    <italic toggle="yes">Empirical Bayes</italic> in our setting, where we set a prior with unknown parameters of 
                    <italic toggle="yes">L</italic> and 
                    <italic toggle="yes">F.</italic> Empirical Bayes is not strictly Bayes, since the prior parameters are directly estimated by the MLE of the data. One example would be normal distribution 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi mathvariant="script">D</mml:mi>
                                <mml:mi>F</mml:mi>
                            </mml:msub>
                        </mml:math>
                    </inline-formula> of 
                    <italic toggle="yes">F</italic> and 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi mathvariant="script">D</mml:mi>
                                <mml:mi>L</mml:mi>
                            </mml:msub>
                        </mml:math>
                    </inline-formula> of 
                    <italic toggle="yes">L</italic> where coordinates are independent, which is conjugate prior. We estimate parameters in 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi mathvariant="script">D</mml:mi>
                                <mml:mi>F</mml:mi>
                            </mml:msub>
                        </mml:math>
                    </inline-formula> and 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi mathvariant="script">D</mml:mi>
                                <mml:mi>L</mml:mi>
                            </mml:msub>
                        </mml:math>
                    </inline-formula> by the maximize the marginal likelihood calculated by integral out 
                    <italic toggle="yes">L</italic> and 
                    <italic toggle="yes">F.</italic> Then we computed the posterior distribution of 
                    <italic toggle="yes">L</italic> and 
                    <italic toggle="yes">F.</italic>
                </p>
            </sec>
        </sec>
        <sec id="sec6">
            <title>Problem setup</title>
            <p>Consider 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>Y</mml:mi>
                        <mml:mo>&#x2208;</mml:mo>
                        <mml:msup>
                            <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                            <mml:mrow>
                                <mml:mi>N</mml:mi>
                                <mml:mo>&#x00d7;</mml:mo>
                                <mml:mi>P</mml:mi>
                            </mml:mrow>
                        </mml:msup>
                    </mml:math>
                </inline-formula>, where each row represents a sample and each column represents a gene. Further, let 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>S</mml:mi>
                        <mml:mo>&#x2208;</mml:mo>
                        <mml:msup>
                            <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                            <mml:mrow>
                                <mml:mi>N</mml:mi>
                                <mml:mo>&#x00d7;</mml:mo>
                                <mml:mn>2</mml:mn>
                            </mml:mrow>
                        </mml:msup>
                    </mml:math>
                </inline-formula> store spatial coordinates of each sample. We assume there are spatially-related genes among all genes. We assume the genes fall into several profiles. The genes in the same profile have the same expression over the sampled spatial context. For each profile, we can summarize the gene expression for such group by one representative factor. Consider there are 
                <italic toggle="yes">K</italic> profiles, we can then form our model as:
                <disp-formula id="e5">
                    <mml:math display="block">
                        <mml:mtable columnalign="left" displaystyle="true">
                            <mml:mtr>
                                <mml:mtd>
                                    <mml:mi>Y</mml:mi>
                                </mml:mtd>
                                <mml:mtd>
                                    <mml:mo>=</mml:mo>
                                    <mml:munderover>
                                        <mml:mo movablelimits="false">&#x2211;</mml:mo>
                                        <mml:mrow>
                                            <mml:mi>k</mml:mi>
                                            <mml:mo>=</mml:mo>
                                            <mml:mn>1</mml:mn>
                                        </mml:mrow>
                                        <mml:mi>K</mml:mi>
                                    </mml:munderover>
                                    <mml:msub>
                                        <mml:mi mathvariant="bold-italic">f</mml:mi>
                                        <mml:mi>k</mml:mi>
                                    </mml:msub>
                                    <mml:msubsup>
                                        <mml:mi mathvariant="bold-italic">l</mml:mi>
                                        <mml:mi>k</mml:mi>
                                        <mml:mi>T</mml:mi>
                                    </mml:msubsup>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi>E</mml:mi>
                                </mml:mtd>
                            </mml:mtr>
                            <mml:mtr>
                                <mml:mtd/>
                                <mml:mtd>
                                    <mml:mo>=</mml:mo>
                                    <mml:msup>
                                        <mml:mi mathvariant="italic">FL</mml:mi>
                                        <mml:mi>T</mml:mi>
                                    </mml:msup>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi>E</mml:mi>
                                </mml:mtd>
                            </mml:mtr>
                        </mml:mtable>
                        <mml:mspace width="0.25em"/>
                    </mml:math>
                </disp-formula>where 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msub>
                            <mml:mi>f</mml:mi>
                            <mml:mi>k</mml:mi>
                        </mml:msub>
                        <mml:mo>&#x2208;</mml:mo>
                        <mml:msup>
                            <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                            <mml:mi>n</mml:mi>
                        </mml:msup>
                    </mml:math>
                </inline-formula> is the factor gene in profile 
                <italic toggle="yes">k</italic> capturing the gene expression pattern in that group, 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msub>
                            <mml:mi>l</mml:mi>
                            <mml:mi>k</mml:mi>
                        </mml:msub>
                        <mml:mo>&#x2208;</mml:mo>
                        <mml:msup>
                            <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                            <mml:mi>p</mml:mi>
                        </mml:msup>
                    </mml:math>
                </inline-formula> indicates the loading coefficients of each factor. We assume there is random noise 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>E</mml:mi>
                        <mml:mo>&#x2208;</mml:mo>
                        <mml:msup>
                            <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                            <mml:mrow>
                                <mml:mi>n</mml:mi>
                                <mml:mo>&#x00d7;</mml:mo>
                                <mml:mi>p</mml:mi>
                            </mml:mrow>
                        </mml:msup>
                    </mml:math>
                </inline-formula> in observations and 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msub>
                            <mml:mi>E</mml:mi>
                            <mml:mi mathvariant="italic">ij</mml:mi>
                        </mml:msub>
                        <mml:mover>
                            <mml:mo>&#x223c;</mml:mo>
                            <mml:mi>iid</mml:mi>
                        </mml:mover>
                        <mml:mi>N</mml:mi>
                        <mml:mfenced close=")" open="(" separators=",">
                            <mml:mn>0</mml:mn>
                            <mml:mrow>
                                <mml:mn>1</mml:mn>
                                <mml:mo>/</mml:mo>
                                <mml:mi>&#x03c4;</mml:mi>
                            </mml:mrow>
                        </mml:mfenced>
                    </mml:math>
                </inline-formula>. The goal is to select spatially-related genes based on spatial information in 
                <italic toggle="yes">S.</italic> We also extract the gene factors which capture information across all genes based on feature extraction. We aim to find a latent gene space that respects spatial structure.</p>
        </sec>
        <sec id="sec7" sec-type="methods">
            <title>Methods</title>
            <p>In this section, we build up pipelines and models to achieve gene dimentionality reduction while retaining spatial structure. We adapt matrix decomposition methods to incorporate spatial information. 
                <ext-link ext-link-type="uri" xlink:href="https://www.r-project.org/about.html">R</ext-link> is a statistical programming language and will be used for the analysis. We first preprocess gene expression to make them amenable for wavelet filtering. We then use the Daubechies D4 Wavelet Transform (
                <xref ref-type="bibr" rid="ref35">Daubechies, 1992</xref>) as wavelet filter with scale of wavelet 
                <italic toggle="yes">J</italic> = 5. We develop the package and share the workflow generating reproducible quantitative results and gene visualization. The package is available from 
                <italic toggle="yes">Software availability.</italic>
            </p>
            <sec id="sec8">
                <title>Gene expression over location</title>
                <p>We leverage a pre-processing step, which we call 
                    <italic toggle="yes">input generation</italic>, to combine spatial and expression data. We have both gene expression measurements and spatial coordinates for each sample. Each gene has an expression over each sample, and we can draw the sample over a 2D map by their coordinates.</p>
                <p>As shown in 
                    <xref ref-type="fig" rid="f3">Figure 2</xref>, the gene with the least sparsity in expression shows varying levels of expression across different spatial regions. Intuitively, we may consider the spatial expression pattern from this gene to be spatially related. However, most genes are sparse and do not show fluctuations in gene expression. Therefore, we first filter out those genes using a 
                    <monospace>kOverA</monospace> filter: we only keep genes that have an expression measure above 
                    <italic toggle="yes">A</italic> in at least 
                    <italic toggle="yes">k</italic> samples (
                    <xref ref-type="bibr" rid="ref36">Gentleman et al., 2021</xref>). This has the effect of removing genes that are rarely active, though it is possible that strongly expressed genes still show no spatial relationships.</p>
                <fig fig-type="figure" id="f3" orientation="portrait" position="float">
                    <label>Figure 2. </label>
                    <caption>
                        <title>The most dense gene has a structured expression pattern (authors own visualization using ggplot2 package in 
                            <ext-link ext-link-type="uri" xlink:href="https://www.r-project.org/about.html">R</ext-link>).</title>
                    </caption>
                    <graphic id="gr2" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/134806/86da6507-272d-4572-b5ee-a93f96a263ce_figure2.gif"/>
                </fig>
                <p>We want gene expression data to have a neat grid structure that can be expressed as a matrix form. However, at first, expression measurements are roughly staggered over the spatial locations, as shown in 
                    <xref ref-type="fig" rid="f4">Figure 3</xref>. This is a consequence of the sampling strategy adopted by the 10x genomics Visium platform. To obtain an evenly resampled version of the expression pattern, we divide the two-dimensional space into several partitions and compute the local average of each partition. Detailes are shown in 
                    <xref ref-type="boxed-text" rid="B1">Algorithm 1</xref>. Each gene has an expression over the grid and we now can use a matrix 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>Z</mml:mi>
                            <mml:mo>&#x2208;</mml:mo>
                            <mml:msup>
                                <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                                <mml:mrow>
                                    <mml:msup>
                                        <mml:mi>D</mml:mi>
                                        <mml:mn>2</mml:mn>
                                    </mml:msup>
                                    <mml:mo>&#x00d7;</mml:mo>
                                    <mml:mi>P</mml:mi>
                                </mml:mrow>
                            </mml:msup>
                        </mml:math>
                    </inline-formula> to represent it. The matrix 
                    <italic toggle="yes">Z</italic> becomes the new input for analysis. We then obtain the same formulation as in Section Problem Setup, where we take 
                    <italic toggle="yes">n</italic> = 
                    <italic toggle="yes">D</italic>
                    <sup>2</sup> and each location corresponds to a new observation. We have 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mi>f</mml:mi>
                                <mml:mi>k</mml:mi>
                            </mml:msub>
                            <mml:mo>&#x2208;</mml:mo>
                            <mml:msup>
                                <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                                <mml:msup>
                                    <mml:mi>D</mml:mi>
                                    <mml:mn>2</mml:mn>
                                </mml:msup>
                            </mml:msup>
                            <mml:mo>,</mml:mo>
                            <mml:mi>k</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:mn>1</mml:mn>
                            <mml:mo>,</mml:mo>
                            <mml:mo>&#x2026;</mml:mo>
                            <mml:mo>,</mml:mo>
                            <mml:mi>K</mml:mi>
                        </mml:math>
                    </inline-formula> as the factor genes, computed by vectorizing the gene expression matrix.</p>
                <fig fig-type="figure" id="f4" orientation="portrait" position="float">
                    <label>Figure 3. </label>
                    <caption>
                        <title>Left: The gene expression over scattered samples over locations. Right: The local average of gene expression over grid locations, used as input to our algorithms (authors own visualization using ggplot2 package in 
                            <ext-link ext-link-type="uri" xlink:href="https://www.r-project.org/about.html">R</ext-link>).</title>
                    </caption>
                    <graphic id="gr3" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/134806/86da6507-272d-4572-b5ee-a93f96a263ce_figure3.gif"/>
                </fig>
                <p>The choice of 
                    <italic toggle="yes">D</italic> depends on the wavelet scale used in the next section. We choose 
                    <italic toggle="yes">D</italic> to be dyadic to prevent handling edge cases in the wavelet transformation (recalling Subsection Wavelet Transformation). The dyadic size is also a natural choice in image analysis. In particular, we choose 
                    <italic toggle="yes">D</italic> = 2
                    <italic toggle="yes">
                        <sup>J</sup>
                    </italic>, where 
                    <italic toggle="yes">J</italic> is the level of scale in the wavelet transformation. Larger 
                    <italic toggle="yes">J</italic> allows finer recovery, but also requires more parameters and may result in overfitting.</p>
            </sec>
            <sec id="sec9">
                <title>Wavelet transformation and shrinkage</title>
                <p>We apply a wavelet transformation with shrinkage to denoise the gene expression matrix and smooth observed spatial expression patterns. In simulations, we find that this technique gives more accurate recovery as the signal-to-noise ratio decreases and yields less noisy visualizations for the spatially-related genes.</p>
                <p>We implement wavelet transformation and shrinkage on the processed data matrix 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>Z</mml:mi>
                            <mml:mo>&#x2208;</mml:mo>
                            <mml:msup>
                                <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                                <mml:mrow>
                                    <mml:msup>
                                        <mml:mi>D</mml:mi>
                                        <mml:mn>2</mml:mn>
                                    </mml:msup>
                                    <mml:mo>&#x00d7;</mml:mo>
                                    <mml:mi>P</mml:mi>
                                </mml:mrow>
                            </mml:msup>
                        </mml:math>
                    </inline-formula>. Each column of 
                    <italic toggle="yes">Z</italic> is a vectorized expression matrix. For each column, we first reshape it into a 
                    <italic toggle="yes">D</italic> &#x00d7; 
                    <italic toggle="yes">D</italic> matrix and apply the wavelet transformation. We have a coefficient list with 
                    <italic toggle="yes">W</italic> coefficients associated with each gene. Then we conduct wavelet shrinkage on the coefficient list using the threshold strategies described in Subsection Wavelet Transformation. Finally, we vectorize the coefficient list of each gene into a length 
                    <italic toggle="yes">W</italic> vector for each gene. Stacking all vectors together, we have a coefficient matrix 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mi>C</mml:mi>
                            <mml:mo>&#x2208;</mml:mo>
                            <mml:msup>
                                <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                                <mml:mrow>
                                    <mml:mi>W</mml:mi>
                                    <mml:mo>&#x00d7;</mml:mo>
                                    <mml:mi>P</mml:mi>
                                </mml:mrow>
                            </mml:msup>
                        </mml:math>
                    </inline-formula>. Note that the wavelet scale is specified by the size 
                    <italic toggle="yes">D</italic> of the input matrix, i.e., we have 
                    <italic toggle="yes">D</italic> = 2
                    <italic toggle="yes">
                        <sup>J</sup>
                    </italic>, where 
                    <italic toggle="yes">J</italic> is the scale. The details are specified in 
                    <xref ref-type="boxed-text" rid="B2">Algorithm 2</xref>.</p>
            </sec>
            <sec id="sec10">
                <title>Matrix decomposition</title>
                <p>The matrix of the shrunk wavelet coefficients 
                    <italic toggle="yes">C</italic> gives a summary of the denoised matrix. This allows improved reconstruction of gene expression data. To obtain a low-dimensional approximation, we apply matrix factorization on the coefficient matrix 
                    <italic toggle="yes">C</italic> after transformation. We use the same notation in Section Problem Setup, with the subscript 
                    <italic toggle="yes">c</italic> to denote the decomposition on coefficient matrix 
                    <italic toggle="yes">C.</italic>
                </p>
                <p>The resulting singular vectors can be used to estimate spatially structured factor genes (Subsection Factor Gene). We use SVD as a frequentist approach to estimating 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mover accent="true">
                                <mml:msub>
                                    <mml:mi>F</mml:mi>
                                    <mml:mi>c</mml:mi>
                                </mml:msub>
                                <mml:mo stretchy="true">&#x0302;</mml:mo>
                            </mml:mover>
                        </mml:math>
                    </inline-formula> and 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mover accent="true">
                                    <mml:mi>L</mml:mi>
                                    <mml:mo stretchy="true">&#x0302;</mml:mo>
                                </mml:mover>
                                <mml:mi>c</mml:mi>
                            </mml:msub>
                        </mml:math>
                    </inline-formula>. We also conduct EBMF (Section Empirical Bayes Matrix Factorization), using the posterior expectation of 
                    <italic toggle="yes">F
                        <sub>c</sub>
                    </italic> and 
                    <italic toggle="yes">L
                        <sub>c</sub>
                    </italic> to estimate 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mover accent="true">
                                <mml:msub>
                                    <mml:mi>F</mml:mi>
                                    <mml:mi>c</mml:mi>
                                </mml:msub>
                                <mml:mo stretchy="true">&#x0302;</mml:mo>
                            </mml:mover>
                        </mml:math>
                    </inline-formula> and 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mover accent="true">
                                <mml:msub>
                                    <mml:mi>L</mml:mi>
                                    <mml:mi>c</mml:mi>
                                </mml:msub>
                                <mml:mo stretchy="true">&#x0302;</mml:mo>
                            </mml:mover>
                        </mml:math>
                    </inline-formula>. EBMF can select the number of factors 
                    <italic toggle="yes">K</italic> by itself, whereas 
                    <italic toggle="yes">K</italic> must be manually specified in the SVD. The choice of 
                    <italic toggle="yes">K</italic> is informed by inspecting the scree plot, as in spectral clustering and PCA. We choose the number of factors when current factors explain sufficient information and the diminishing returns of additional factors are no longer worth the additional cost. Given 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mover accent="true">
                                    <mml:mi>F</mml:mi>
                                    <mml:mo stretchy="true">&#x0302;</mml:mo>
                                </mml:mover>
                                <mml:mi>c</mml:mi>
                            </mml:msub>
                        </mml:math>
                    </inline-formula> and 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mover accent="true">
                                    <mml:mi>L</mml:mi>
                                    <mml:mo stretchy="true">&#x0302;</mml:mo>
                                </mml:mover>
                                <mml:mi>C</mml:mi>
                            </mml:msub>
                        </mml:math>
                    </inline-formula>, we can compute the estimated coefficient matrix as 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mover accent="true">
                                <mml:mi>C</mml:mi>
                                <mml:mo stretchy="true">&#x0302;</mml:mo>
                            </mml:mover>
                            <mml:mo>=</mml:mo>
                            <mml:msub>
                                <mml:mover accent="true">
                                    <mml:mi>F</mml:mi>
                                    <mml:mo stretchy="true">&#x0302;</mml:mo>
                                </mml:mover>
                                <mml:mi>c</mml:mi>
                            </mml:msub>
                            <mml:msubsup>
                                <mml:mover accent="true">
                                    <mml:mi>L</mml:mi>
                                    <mml:mo stretchy="true">&#x0302;</mml:mo>
                                </mml:mover>
                                <mml:mi>c</mml:mi>
                                <mml:mi>T</mml:mi>
                            </mml:msubsup>
                        </mml:math>
                    </inline-formula>.</p>
            </sec>
            <sec id="sec11">
                <title>Inverse wavelet transformation</title>
                <p>To transfer the coefficient matrix 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mover accent="true">
                                <mml:mi>C</mml:mi>
                                <mml:mo stretchy="true">&#x0302;</mml:mo>
                            </mml:mover>
                        </mml:math>
                    </inline-formula> back to the estimated location-by-gene matrix 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mover accent="true">
                                <mml:mi>Z</mml:mi>
                                <mml:mo stretchy="true">&#x0302;</mml:mo>
                            </mml:mover>
                        </mml:math>
                    </inline-formula>, we apply the inverse wavelet transformation on columns of 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mover accent="true">
                                <mml:mi>C</mml:mi>
                                <mml:mo stretchy="true">&#x0302;</mml:mo>
                            </mml:mover>
                        </mml:math>
                    </inline-formula>, each of which is a vectorized coefficient list for one gene. This results in a 
                    <italic toggle="yes">D</italic>
                    <sup>2</sup> expression matrix for each gene. Then we vectorize the matrix and stack all vectors together. This yields the reconstructed matrix 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mover accent="true">
                                <mml:mi>Z</mml:mi>
                                <mml:mo stretchy="true">&#x0302;</mml:mo>
                            </mml:mover>
                        </mml:math>
                    </inline-formula>. Details are given in 
                    <xref ref-type="boxed-text" rid="B3">Algorithm 3</xref>. For visualization, we also conduct a similar process for each column of 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mover accent="true">
                                <mml:msub>
                                    <mml:mi>F</mml:mi>
                                    <mml:mi>c</mml:mi>
                                </mml:msub>
                                <mml:mo stretchy="true">&#x0302;</mml:mo>
                            </mml:mover>
                        </mml:math>
                    </inline-formula>. In this case, each column is associated with an factor gene. By applying the inverse wavelet transformation to each factor gene, we can build spatial gene expression matrices 
                    <italic toggle="yes">M</italic>
                    <sub>1</sub>, &#x2026;, 
                    <italic toggle="yes">M
                        <sub>K</sub>
                    </italic> representing gene factors.</p>
                <boxed-text id="B1" orientation="portrait" position="float">
                    <label>Alogorithm 1. </label>
                    <caption>
                        <title>Input generation.</title>
                    </caption>
                    <p>
                        <bold>Require</bold> Sample by gene matrix 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mi>Y</mml:mi>
                                <mml:mo>&#x2208;</mml:mo>
                                <mml:msup>
                                    <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                                    <mml:mrow>
                                        <mml:mi>N</mml:mi>
                                        <mml:mo>&#x00d7;</mml:mo>
                                        <mml:mi>P</mml:mi>
                                    </mml:mrow>
                                </mml:msup>
                            </mml:math>
                        </inline-formula>, spatial matrix 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mi>S</mml:mi>
                                <mml:mo>&#x2208;</mml:mo>
                                <mml:msup>
                                    <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                                    <mml:mrow>
                                        <mml:mi>N</mml:mi>
                                        <mml:mo>&#x00d7;</mml:mo>
                                        <mml:mn>2</mml:mn>
                                    </mml:mrow>
                                </mml:msup>
                            </mml:math>
                        </inline-formula>, size of gene expression matrix 
                        <italic toggle="yes">D</italic>
                    </p>
                    <p>&#x2003;1: Compute the range of 
                        <italic toggle="yes">x</italic>, 
                        <italic toggle="yes">y</italic> coordinates from 
                        <italic toggle="yes">S</italic>, compute the coordinates of vertices of big rectangle map 
                        <italic toggle="yes">B</italic> cover all 
                        <italic toggle="yes">N</italic> samples spatially.</p>
                    <p>&#x2003;2: Partition interval 
                        <italic toggle="yes">x</italic> and interval 
                        <italic toggle="yes">y</italic> into 
                        <italic toggle="yes">D</italic> equal length interval, together get 
                        <italic toggle="yes">D</italic>
                        <sup>2</sup> partitions over rectangle 
                        <italic toggle="yes">B.</italic>
                    </p>
                    <p>&#x2003;3: 
                        <bold>while</bold> 
                        <italic toggle="yes">i</italic> &#x2264; 
                        <italic toggle="yes">P</italic> 
                        <bold>do</bold>&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x22b3; Consider gene 
                        <italic toggle="yes">i</italic> expression over map 
                        <italic toggle="yes">B</italic>
                    </p>
                    <p>&#x2003;4:&#x2003;&#x2003;Select 
                        <italic toggle="yes">i</italic>-th column of gene matrix 
                        <italic toggle="yes">Y</italic>
                    </p>
                    <p>&#x2003;5:&#x2003;&#x2003;Compute the local average of gene expression of gene 
                        <italic toggle="yes">i</italic> in each partition</p>
                    <p>&#x2003;6:&#x2003;&#x2003;Get 
                        <italic toggle="yes">D</italic>
                        <sup>2</sup> matrix 
                        <italic toggle="yes">G
                            <sub>i</sub>
                        </italic> as gene expression of gene 
                        <italic toggle="yes">i</italic>
                    </p>
                    <p>&#x2003;7:&#x2003;&#x2003;Vectorize 
                        <italic toggle="yes">G
                            <sub>i</sub>
                        </italic> into a vector 
                        <italic toggle="yes">g
                            <sub>i</sub>
                        </italic> with length 
                        <italic toggle="yes">D</italic>
                        <sup>2</sup>
                    </p>
                    <p>&#x2003;8: 
                        <bold>end while</bold>
                    </p>
                    <p>&#x2003;9: Stacking all 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:msubsup>
                                    <mml:mfenced close="}" open="{">
                                        <mml:msub>
                                            <mml:mi>g</mml:mi>
                                            <mml:mi>i</mml:mi>
                                        </mml:msub>
                                    </mml:mfenced>
                                    <mml:mrow>
                                        <mml:mi>i</mml:mi>
                                        <mml:mo>=</mml:mo>
                                        <mml:mi>l</mml:mi>
                                    </mml:mrow>
                                    <mml:mi>P</mml:mi>
                                </mml:msubsup>
                            </mml:math>
                        </inline-formula> together into matrix 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mi>Z</mml:mi>
                                <mml:mo>&#x2208;</mml:mo>
                                <mml:msup>
                                    <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                                    <mml:mrow>
                                        <mml:msup>
                                            <mml:mi>D</mml:mi>
                                            <mml:mn>2</mml:mn>
                                        </mml:msup>
                                        <mml:mo>&#x00d7;</mml:mo>
                                        <mml:mi>P</mml:mi>
                                    </mml:mrow>
                                </mml:msup>
                            </mml:math>
                        </inline-formula>
                    </p>
                    <p>&#x2003;10: 
                        <bold>return</bold> matrix 
                        <italic toggle="yes">Z</italic>&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x22b3; A transformed gene expression matrix</p>
                </boxed-text>
                <boxed-text id="B2" orientation="portrait" position="float">
                    <label>Algorithm 2. </label>
                    <caption>
                        <title>Wavelet transformation and shrinkage.</title>
                    </caption>
                    <p>
                        <bold>Require</bold> Location by gene matrix 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mi>Z</mml:mi>
                                <mml:mo>&#x2208;</mml:mo>
                                <mml:msup>
                                    <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                                    <mml:mrow>
                                        <mml:msup>
                                            <mml:mi>D</mml:mi>
                                            <mml:mn>2</mml:mn>
                                        </mml:msup>
                                        <mml:mo>&#x00d7;</mml:mo>
                                        <mml:mi>P</mml:mi>
                                    </mml:mrow>
                                </mml:msup>
                            </mml:math>
                        </inline-formula>, threshold method, optional threshold parameter 
                        <italic toggle="yes">&#x03c4;.</italic>&#x2003;&#x2003;&#x2003;&#x22b3; Apply threshold on wavelet coefficient if threshold method is specified</p>
                    <p>&#x2003;1: 
                        <bold>while</bold> 
                        <italic toggle="yes">i</italic> &#x2264; 
                        <italic toggle="yes">P</italic> 
                        <bold>do</bold>&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x22b3; Consider gene 
                        <italic toggle="yes">i</italic> expression as matrix 
                        <italic toggle="yes">G
                            <sub>i</sub>
                        </italic>
                    </p>
                    <p>&#x2003;2:&#x2003;&#x2003;Select 
                        <italic toggle="yes">i</italic>
                        <sup>th</sup> 
                        <italic toggle="yes">g
                            <sub>i</sub>
                        </italic> column of gene matrix 
                        <italic toggle="yes">Z</italic>
                    </p>
                    <p>&#x2003;3:&#x2003;&#x2003;Form 
                        <italic toggle="yes">g
                            <sub>i</sub>
                        </italic> into expression matrix 
                        <italic toggle="yes">G
                            <sub>i</sub>
                        </italic> with size 
                        <italic toggle="yes">D</italic>
                        <sup>2</sup>
                    </p>
                    <p>&#x2003;4:&#x2003;&#x2003;Apply 2-D discrete wavelet transformation over 
                        <italic toggle="yes">G
                            <sub>i</sub>
                        </italic>, get coefficient list 
                        <italic toggle="yes">C
                            <sub>i</sub>
                        </italic>, with number of coefficients 
                        <italic toggle="yes">W</italic>
                    </p>
                    <p>&#x2003;5:&#x2003;&#x2003;Apply wavelet shrinkage over 
                        <italic toggle="yes">C
                            <sub>i</sub>
                        </italic> with optional parameter 
                        <italic toggle="yes">&#x03c4;</italic>
                    </p>
                    <p>&#x2003;6:&#x2003;&#x2003;Vectorize 
                        <italic toggle="yes">C
                            <sub>i</sub>
                        </italic> into a long vector 
                        <italic toggle="yes">c
                            <sub>i</sub>
                        </italic> with length 
                        <italic toggle="yes">W</italic>
                    </p>
                    <p>&#x2003;7: 
                        <bold>end while</bold>
                    </p>
                    <p>&#x2003;8: Stacking all 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:msubsup>
                                    <mml:mfenced close="}" open="{">
                                        <mml:msub>
                                            <mml:mi>c</mml:mi>
                                            <mml:mi>i</mml:mi>
                                        </mml:msub>
                                    </mml:mfenced>
                                    <mml:mrow>
                                        <mml:mi>i</mml:mi>
                                        <mml:mo>=</mml:mo>
                                        <mml:mi>l</mml:mi>
                                    </mml:mrow>
                                    <mml:mi>P</mml:mi>
                                </mml:msubsup>
                            </mml:math>
                        </inline-formula> together into matrix 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mi>C</mml:mi>
                                <mml:mo>&#x2208;</mml:mo>
                                <mml:msup>
                                    <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                                    <mml:mrow>
                                        <mml:mi>W</mml:mi>
                                        <mml:mo>&#x00d7;</mml:mo>
                                        <mml:mi>P</mml:mi>
                                    </mml:mrow>
                                </mml:msup>
                            </mml:math>
                        </inline-formula>
                    </p>
                    <p>&#x2003;9: 
                        <bold>return</bold> coefficient matrix 
                        <italic toggle="yes">C</italic>&#x2003;&#x2003;&#x2002;&#x2003;&#x2003;&#x22b3; Column 
                        <italic toggle="yes">i</italic> of 
                        <italic toggle="yes">C</italic> store the coefficient of from gene 
                        <italic toggle="yes">i</italic>
                    </p>
                </boxed-text>
                <boxed-text id="B3" orientation="portrait" position="float">
                    <label>Algorithm 3. </label>
                    <caption>
                        <title>Inverse wavelet transformation.</title>
                    </caption>
                    <p>
                        <bold>Require:</bold> (reconstructed) Coefficient matrix 
                        <italic toggle="yes">C</italic>
                    </p>
                    <p>&#x2003;1: 
                        <bold>while</bold> 
                        <italic toggle="yes">i</italic> &#x2264; 
                        <italic toggle="yes">P</italic> 
                        <bold>do</bold>
                    </p>
                    <p>&#x2003;2:&#x2003;&#x2003;Select 
                        <italic toggle="yes">i</italic>
                        <sup>th</sup> 
                        <italic toggle="yes">c
                            <sub>i</sub>
                        </italic> column of gene matrix 
                        <italic toggle="yes">C</italic>
                    </p>
                    <p>&#x2003;3:&#x2003;&#x2003;Form 
                        <italic toggle="yes">c
                            <sub>i</sub>
                        </italic> into coefficient list 
                        <italic toggle="yes">C
                            <sub>i</sub>
                        </italic> with number of coefficients 
                        <italic toggle="yes">W</italic>
                    </p>
                    <p>&#x2003;4:&#x2003;&#x2003;Apply 2-D inverse wavelet transformation over 
                        <italic toggle="yes">C
                            <sub>i</sub>
                        </italic>, get post-wavelet expression matrix 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mover accent="true">
                                    <mml:msub>
                                        <mml:mi>G</mml:mi>
                                        <mml:mi>i</mml:mi>
                                    </mml:msub>
                                    <mml:mo stretchy="true">&#x0302;</mml:mo>
                                </mml:mover>
                            </mml:math>
                        </inline-formula>
                    </p>
                    <p>&#x2003;5:&#x2003;&#x2003;Vectorize 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mover accent="true">
                                    <mml:msub>
                                        <mml:mi>G</mml:mi>
                                        <mml:mi>i</mml:mi>
                                    </mml:msub>
                                    <mml:mo stretchy="true">&#x0302;</mml:mo>
                                </mml:mover>
                            </mml:math>
                        </inline-formula> into a long vector 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mover accent="true">
                                    <mml:msub>
                                        <mml:mi>g</mml:mi>
                                        <mml:mi>i</mml:mi>
                                    </mml:msub>
                                    <mml:mo stretchy="true">&#x0302;</mml:mo>
                                </mml:mover>
                            </mml:math>
                        </inline-formula> with length 
                        <italic toggle="yes">D</italic>
                        <sup>2</sup>
                    </p>
                    <p>&#x2003;6: 
                        <bold>end while</bold>
                    </p>
                    <p>&#x2003;7: Stacking all 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:msubsup>
                                    <mml:mfenced close="}" open="{">
                                        <mml:mover accent="true">
                                            <mml:msub>
                                                <mml:mi>g</mml:mi>
                                                <mml:mi>i</mml:mi>
                                            </mml:msub>
                                            <mml:mo stretchy="true">&#x0302;</mml:mo>
                                        </mml:mover>
                                    </mml:mfenced>
                                    <mml:mrow>
                                        <mml:mi>i</mml:mi>
                                        <mml:mo>=</mml:mo>
                                        <mml:mn>1</mml:mn>
                                    </mml:mrow>
                                    <mml:mi>P</mml:mi>
                                </mml:msubsup>
                            </mml:math>
                        </inline-formula> together into matrix 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mover accent="true">
                                    <mml:mi>Z</mml:mi>
                                    <mml:mo stretchy="true">&#x0302;</mml:mo>
                                </mml:mover>
                                <mml:mo>&#x2208;</mml:mo>
                                <mml:msup>
                                    <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                                    <mml:mrow>
                                        <mml:msup>
                                            <mml:mi>D</mml:mi>
                                            <mml:mn>2</mml:mn>
                                        </mml:msup>
                                        <mml:mo>&#x00d7;</mml:mo>
                                        <mml:mi>P</mml:mi>
                                    </mml:mrow>
                                </mml:msup>
                            </mml:math>
                        </inline-formula>
                    </p>
                    <p>&#x2003;8: 
                        <bold>return</bold> matrix 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mover accent="true">
                                    <mml:mi>Z</mml:mi>
                                    <mml:mo stretchy="true">&#x0302;</mml:mo>
                                </mml:mover>
                            </mml:math>
                        </inline-formula>&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x22b3; Post processing reconstructed gene expression matrix</p>
                </boxed-text>
            </sec>
            <sec id="sec12">
                <title>Evaluation</title>
                <p>We evaluate our proposal both quantitatively and qualitatively. Our quantitative results measures reconstruction error between the estimated 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mover accent="true">
                                <mml:mi>Z</mml:mi>
                                <mml:mo stretchy="true">&#x0302;</mml:mo>
                            </mml:mover>
                        </mml:math>
                    </inline-formula> and 
                    <italic toggle="yes">Z</italic> using the Frobenius norm 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msubsup>
                                <mml:mfenced close="&#x2016;" open="&#x2016;">
                                    <mml:mrow>
                                        <mml:mover accent="true">
                                            <mml:mi>Z</mml:mi>
                                            <mml:mo stretchy="true">&#x0302;</mml:mo>
                                        </mml:mover>
                                        <mml:mo>&#x2212;</mml:mo>
                                        <mml:mi>Z</mml:mi>
                                    </mml:mrow>
                                </mml:mfenced>
                                <mml:mi>F</mml:mi>
                                <mml:mn>2</mml:mn>
                            </mml:msubsup>
                        </mml:math>
                    </inline-formula>.</p>
                <p>This evaluation is also used for hyperparameter tuning, such as the scale 
                    <italic toggle="yes">J</italic> of the wavelet and the choice of wavelet thresholding method. Our qualitative result is given by visualization of the estimated factor gene expression matrices, with emphasis on capturing global spatial structure.</p>
                <p>We use cross-validation in computing reconstruction error and calculating gene-wise errors. This evaluation is helpful in selecting spatially related genes. We also found a simple connection between the gradient of gene expression and spatial contribution, discussed in Section Real Data Experiment. This discussion requires a measure of spatial expression smoothness. To this end, a simple step computing the fluctuation of gene expression would give a spatial gene selection that coincides with the reconstruction error selection. For calculating successive differences, consider 
                    <italic toggle="yes">Z</italic> as the input matrix and let 
                    <italic toggle="yes">&#x03b4;
                        <sub>jk</sub>
                    </italic> = (
                    <italic toggle="yes">z</italic>
                    <sub>
                        <italic toggle="yes">j</italic>, 
                        <italic toggle="yes">k</italic>+1</sub>&#x2212;
                    <italic toggle="yes">z
                        <sub>jk</sub>
                    </italic>)
                    <sup>2</sup>, 
                    <italic toggle="yes">k</italic> = 1, &#x2026;, 
                    <italic toggle="yes">D</italic>
                    <sup>2</sup>. Then we define the gradient by 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mo>&#x2211;</mml:mo>
                                <mml:mi mathvariant="italic">jk</mml:mi>
                            </mml:msub>
                            <mml:msub>
                                <mml:mi>&#x03b4;</mml:mi>
                                <mml:mi mathvariant="italic">jk</mml:mi>
                            </mml:msub>
                        </mml:math>
                    </inline-formula>.</p>
            </sec>
        </sec>
        <sec id="sec13">
            <title>Simulation</title>
            <p>We setup simulations to see whether representing spatial structure with a wavelet basis supports denoising and visualization of gene expression. We will see improved recovery in the low signal-to-noise ratio regime. Qualitatively, we also find factor gene visualizations to be more spatially consistent. We use use the Daubechies D4 Wavelet Transform (
                <xref ref-type="bibr" rid="ref35">Daubechies, 1992</xref>) as wavelet filter with scale of wavelet 
                <italic toggle="yes">J</italic> = 5. We develop the package and share the workflow generating reproducible quantitative results and gene visualization. The package is available from 
                <italic toggle="yes">Software availability.</italic>
            </p>
            <p>We first describe the simulation mechanism. We the set number of factors to 
                <italic toggle="yes">K</italic> = 9. We generate 
                <italic toggle="yes">K</italic> gene expression matrices 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msub>
                            <mml:mi>M</mml:mi>
                            <mml:mn>1</mml:mn>
                        </mml:msub>
                        <mml:mo>,</mml:mo>
                        <mml:mo>&#x2026;</mml:mo>
                        <mml:mo>,</mml:mo>
                        <mml:msub>
                            <mml:mi>M</mml:mi>
                            <mml:mi>K</mml:mi>
                        </mml:msub>
                        <mml:mo>&#x2208;</mml:mo>
                        <mml:msup>
                            <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                            <mml:msup>
                                <mml:mi>D</mml:mi>
                                <mml:mn>2</mml:mn>
                            </mml:msup>
                        </mml:msup>
                    </mml:math>
                </inline-formula>, each representing a factor gene. These factor gene expression patterns are shown in 
                <xref ref-type="fig" rid="f5">Figure 4</xref>. We vectorize the patterns 
                <italic toggle="yes">M
                    <sub>k</sub>
                </italic> into factor genes 
                <italic toggle="yes">f
                    <sub>k</sub>
                </italic>, which are then scaled so &#x2225;
                <italic toggle="yes">f
                    <sub>k</sub>
                </italic>&#x2225; = 1. We obtain 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>F</mml:mi>
                        <mml:mo>&#x2208;</mml:mo>
                        <mml:msup>
                            <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                            <mml:mrow>
                                <mml:msup>
                                    <mml:mi>D</mml:mi>
                                    <mml:mn>2</mml:mn>
                                </mml:msup>
                                <mml:mo>&#x00d7;</mml:mo>
                                <mml:mi>K</mml:mi>
                            </mml:mrow>
                        </mml:msup>
                    </mml:math>
                </inline-formula> by stacking all the 
                <italic toggle="yes">f
                    <sub>k</sub>
                </italic> together. We generate loadings 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msub>
                            <mml:mi>l</mml:mi>
                            <mml:mn>1</mml:mn>
                        </mml:msub>
                        <mml:mo>,</mml:mo>
                        <mml:mo>&#x2026;</mml:mo>
                        <mml:mo>,</mml:mo>
                        <mml:msub>
                            <mml:mi>l</mml:mi>
                            <mml:mi>K</mml:mi>
                        </mml:msub>
                        <mml:mo>&#x2208;</mml:mo>
                        <mml:msup>
                            <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                            <mml:mrow>
                                <mml:mi>P</mml:mi>
                                <mml:mo>&#x00d7;</mml:mo>
                                <mml:mi>K</mml:mi>
                            </mml:mrow>
                        </mml:msup>
                    </mml:math>
                </inline-formula> by drawing coordinates independently from 
                <italic toggle="yes">N</italic>(0, 1). We set 
                <italic toggle="yes">D</italic> = 32 and wavelet scale 
                <italic toggle="yes">J</italic> = 5. We similarly stack 
                <italic toggle="yes">l
                    <sub>k</sub>
                </italic> to obtain 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>L</mml:mi>
                        <mml:mo>&#x2208;</mml:mo>
                        <mml:msup>
                            <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                            <mml:mrow>
                                <mml:mi>P</mml:mi>
                                <mml:mo>&#x00d7;</mml:mo>
                                <mml:mi>K</mml:mi>
                            </mml:mrow>
                        </mml:msup>
                    </mml:math>
                </inline-formula>. We use 
                <italic toggle="yes">Z</italic> = 
                <italic toggle="yes">FL
                    <sup>T</sup>
                </italic> as the ground-truth signal matrix and add noise 
                <italic toggle="yes">E</italic> to yield data matrix 
                <italic toggle="yes">Z
                    <sub>d</sub>
                </italic> = 
                <italic toggle="yes">FL
                    <sup>T</sup>
                </italic> + 
                <italic toggle="yes">E</italic>, where entries in 
                <italic toggle="yes">E</italic> are zero-mean normal noise that corrupt the underlying signal. In our analysis, we use a Daubechies D4 Wavelet as the wavelet filter, and for coefficient shrinkage we use hybrid thresholding.</p>
            <fig fig-type="figure" id="f5" orientation="portrait" position="float">
                <label>Figure 4. </label>
                <caption>
                    <title>The gene expression pattern for nine factor genes in the simulation experiment (authors own visualization using ggplot2 package in 
                        <ext-link ext-link-type="uri" xlink:href="https://www.r-project.org/about.html">R</ext-link>).</title>
                </caption>
                <graphic id="gr4" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/134806/86da6507-272d-4572-b5ee-a93f96a263ce_figure4.gif"/>
            </fig>
            <p>We use the pipeline from Section Methods. The generated data 
                <italic toggle="yes">Z
                    <sub>d</sub>
                </italic> already reflects the structure produced by the processing of Subsection Gene Expression Over Location. For comparison, we also directly decompose the matrix 
                <italic toggle="yes">Z
                    <sub>d</sub>
                </italic> with the SVD without any wavelet transformation. We call the resulting factors 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msub>
                            <mml:mover accent="true">
                                <mml:mi>F</mml:mi>
                                <mml:mo stretchy="true">&#x0302;</mml:mo>
                            </mml:mover>
                            <mml:mi mathvariant="italic">raw</mml:mi>
                        </mml:msub>
                    </mml:math>
                </inline-formula> and reconstruction 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msub>
                            <mml:mover accent="true">
                                <mml:mi>Z</mml:mi>
                                <mml:mo stretchy="true">&#x0302;</mml:mo>
                            </mml:mover>
                            <mml:mi mathvariant="italic">raw</mml:mi>
                        </mml:msub>
                    </mml:math>
                </inline-formula>. We also have the same quantitative reconstruction error and qualitative visualization of factor gene 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msub>
                            <mml:mover accent="true">
                                <mml:mi>F</mml:mi>
                                <mml:mo stretchy="true">&#x0302;</mml:mo>
                            </mml:mover>
                            <mml:mi mathvariant="italic">raw</mml:mi>
                        </mml:msub>
                    </mml:math>
                </inline-formula>. We also measure the size of the gradients across the estimated gene expression matrix 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msub>
                            <mml:mover accent="true">
                                <mml:mi>M</mml:mi>
                                <mml:mo stretchy="true">&#x0302;</mml:mo>
                            </mml:mover>
                            <mml:mi>i</mml:mi>
                        </mml:msub>
                    </mml:math>
                </inline-formula>. The gradient is computed by successive difference of neighboring image pixels. We calculated the sum of the squares of the gradient. This measurement shows whether the gene expression matrix has been smoothed. This property is of interest, since smoother estimates are often more visually appealing.</p>
            <p>We denote the signal-to-noise ratio as 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>SNR</mml:mi>
                        <mml:mo>=</mml:mo>
                        <mml:mfrac>
                            <mml:mrow>
                                <mml:mi>sd</mml:mi>
                                <mml:mfenced close=")" open="(">
                                    <mml:mi>Z</mml:mi>
                                </mml:mfenced>
                            </mml:mrow>
                            <mml:mrow>
                                <mml:mi>sd</mml:mi>
                                <mml:mfenced close=")" open="(">
                                    <mml:mi>E</mml:mi>
                                </mml:mfenced>
                            </mml:mrow>
                        </mml:mfrac>
                    </mml:math>
                </inline-formula>, where 
                <italic toggle="yes">sd</italic> stands for standard deviation. Let 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>r</mml:mi>
                        <mml:mo>=</mml:mo>
                        <mml:mfrac>
                            <mml:mn>1</mml:mn>
                            <mml:mi>SNR</mml:mi>
                        </mml:mfrac>
                    </mml:math>
                </inline-formula>. We specify 19 evenly spaced settings of 
                <italic toggle="yes">r</italic> from 1 to 10. For each setting, we run 100 replicates with different simulated 
                <italic toggle="yes">Z
                    <sub>d</sub>
                </italic> and apply wavelet and SVD-based dimensionality reduction. The resulting average errors across 
                <italic toggle="yes">r</italic> are shown in the 
                <xref ref-type="fig" rid="f6">Figure 5</xref>. The wavelet and shrinkage technique has better performance when 
                <italic toggle="yes">r</italic> is larger than 5, i.e., the low signal-to-noise ratio regime. The gradient of the gene expression matrix under two methods is shown in 
                <xref ref-type="fig" rid="f6">Figure 5</xref>. The wavelet and shrinkage approach smooths edges in the factor gene expression image, giving a more interpretable visualization and lower error in this low SNR setting.</p>
            <fig fig-type="figure" id="f6" orientation="portrait" position="float">
                <label>Figure 5. </label>
                <caption>
                    <title>The reconstruction error and gradient for estimated gene-by-location matrix with different SNR (signal-to-noise regime).</title>
                    <p>The 
                        <italic toggle="yes">x</italic>-axis shows the rate of 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mfrac>
                                    <mml:mn>1</mml:mn>
                                    <mml:mi>SNR</mml:mi>
                                </mml:mfrac>
                            </mml:math>
                        </inline-formula> The left subplot shows the gradient changes. The gradient of gene expression is always lower with the wavelet technique, a consequence of its smoothing property. The right subplot shows the error &#x2013; the wavelet method has lower error as the magnitude of noise increase. The underlying data can be found as simulation_data in package from 
                        <italic toggle="yes">Software availability.</italic> SVD, singular value decomposition.</p>
                </caption>
                <graphic id="gr5" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/134806/86da6507-272d-4572-b5ee-a93f96a263ce_figure5.gif"/>
            </fig>
            <p>The factor genes are shown in 
                <xref ref-type="fig" rid="f7">Figure 6</xref>. The SVD without wavelet technique has erratic, outlying pixels, especially in the last three genes. The visualization is sensitive to outliers. In contrast, the SVD combined with wavelet has smoother patterns. Like the SVD-based method, it appears to have mixed several of the true underlying factors in each of the recovered ones. Moreover, the sharp boundaries visible in the SVD factors become smoothed over in the wavelet-decomposition. The wavelet method applies a decomposition on coefficients space after thresholding, while SVD operates on individual pixels. The SVD capture more information, but also emphasize nuisance information induced by errors. Wavelet method also reduces model complexity, improving estimation accuracy. Other non-parametric methods, such as Fourier transformation and Gaussian Process-based methods also operate on coefficients space. Still, they would struggle to capture sharp transitions, since their bases are smooth functions.</p>
            <fig fig-type="figure" id="f7" orientation="portrait" position="float">
                <label>Figure 6. </label>
                <caption>
                    <title>Factor gene visualization.</title>
                    <p>(a) Implements SVD on 
                        <italic toggle="yes">Z
                            <sub>d</sub>
                        </italic>, the visualization of column of 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:msub>
                                    <mml:mover accent="true">
                                        <mml:mi>F</mml:mi>
                                        <mml:mo stretchy="true">&#x0302;</mml:mo>
                                    </mml:mover>
                                    <mml:mi mathvariant="italic">raw</mml:mi>
                                </mml:msub>
                            </mml:math>
                        </inline-formula>. (b) Implements SVD on coefficient matrix, the visualization of 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:mover accent="true">
                                    <mml:mi>F</mml:mi>
                                    <mml:mo stretchy="true">&#x0302;</mml:mo>
                                </mml:mover>
                            </mml:math>
                        </inline-formula>. We use 
                        <italic toggle="yes">Z</italic> = 
                        <italic toggle="yes">FL
                            <sup>T</sup>
                        </italic> as the ground-truth signal matrix and add noise 
                        <italic toggle="yes">E</italic> to yield data matrix 
                        <italic toggle="yes">Z
                            <sub>d</sub>
                        </italic> = 
                        <italic toggle="yes">FL
                            <sup>T</sup>
                        </italic> + 
                        <italic toggle="yes">E</italic>, where entries in 
                        <italic toggle="yes">E</italic> are zero-mean normal noise that corrupt the underlying signal. For comparison, we also directly decompose the matrix 
                        <italic toggle="yes">Z
                            <sub>d</sub>
                        </italic> with the SVD without any wavelet transformation. We call the resulting factors 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:msub>
                                    <mml:mover accent="true">
                                        <mml:mi>F</mml:mi>
                                        <mml:mo stretchy="true">&#x0302;</mml:mo>
                                    </mml:mover>
                                    <mml:mi mathvariant="italic">raw</mml:mi>
                                </mml:msub>
                            </mml:math>
                        </inline-formula> and reconstruction 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:msub>
                                    <mml:mover accent="true">
                                        <mml:mi>Z</mml:mi>
                                        <mml:mo stretchy="true">&#x0302;</mml:mo>
                                    </mml:mover>
                                    <mml:mi mathvariant="italic">raw</mml:mi>
                                </mml:msub>
                            </mml:math>
                        </inline-formula>. We also have the same quantitative reconstruction error and qualitative visualization of factor gene 
                        <inline-formula>
                            <mml:math display="inline">
                                <mml:msub>
                                    <mml:mover accent="true">
                                        <mml:mi>F</mml:mi>
                                        <mml:mo stretchy="true">&#x0302;</mml:mo>
                                    </mml:mover>
                                    <mml:mi mathvariant="italic">raw</mml:mi>
                                </mml:msub>
                            </mml:math>
                        </inline-formula>. (authors own visualization using ggplot2 package in 
                        <ext-link ext-link-type="uri" xlink:href="https://www.r-project.org/about.html">R</ext-link>).</p>
                </caption>
                <graphic id="gr6" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/134806/86da6507-272d-4572-b5ee-a93f96a263ce_figure6.gif"/>
            </fig>
            <p>Nonetheless, both the SVD and wavelet-based visualizations reflect spatial trends in the true factor genes (in 
                <xref ref-type="fig" rid="f5">Figure 4</xref>).</p>
        </sec>
        <sec id="sec14">
            <title>Real data experiment</title>
            <p>In this section, we show that wavelet and shrinkage technique reduces reconstruction error quantitatively. We ran our method on a public spatially resolved transcriptomics data (
                <xref ref-type="bibr" rid="ref24">Weber, 2021</xref>). The dataset can be accessed in R package 
                <ext-link ext-link-type="uri" xlink:href="https://bioconductor.org/packages/release/data/experiment/html/STexampleData.html">STexampleData</ext-link> with version 3.15. The dataset represents a single biological sample from the human brain dorsolateral prefrontal cortex (DLPFC) region, measured with the 10x Genomics Visium platform. Further, we identify a simple connection between the gradient of gene expression and quantitative error. A simple step computing the fluctuation of gene expression alone (calculating successive difference of the gene expression image) selects genes that have reduced reconstruction error when using wavelet-guided dimensionality reduction (
                <xref ref-type="bibr" rid="ref37">Zhuoyan, 2022</xref>).</p>
            <p>We first process the ST data through our pipeline. The ST data contains 4992 observations with 33538 genes. Expressions from most genes are sparse. We implement the pipeline from Section Methods. We pre-process as in Subsection Gene Expression Over Location and then apply a 
                <monospace>kOverA</monospace> filter to select genes. We find 
                <italic toggle="yes">k</italic> = 3, 
                <italic toggle="yes">A</italic> = 7 gives us 721 genes with average expression as 2.71. Then we apply Algorithm 1 to transfer the sample-by-gene matrix to a grid-by-gene matrix. We set 
                <italic toggle="yes">D</italic> = 64 and wavelet scale 
                <italic toggle="yes">J</italic> = 6. We obtain an image heatmap for each gene like in 
                <xref ref-type="fig" rid="f8">Figure 7</xref>.</p>
            <fig fig-type="figure" id="f8" orientation="portrait" position="float">
                <label>Figure 7. </label>
                <caption>
                    <title>Gene expression over the grid (authors own visualization using ggplot2 package in 
                        <ext-link ext-link-type="uri" xlink:href="https://www.r-project.org/about.html">R</ext-link>).</title>
                </caption>
                <graphic id="gr7" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/134806/86da6507-272d-4572-b5ee-a93f96a263ce_figure7.gif"/>
            </fig>
            <p>We then vectorize 
                <italic toggle="yes">P</italic> = 721 image expression and stacking vectors together, we have generated input data 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>Z</mml:mi>
                        <mml:mo>&#x2208;</mml:mo>
                        <mml:msup>
                            <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                            <mml:mrow>
                                <mml:msup>
                                    <mml:mn>64</mml:mn>
                                    <mml:mn>2</mml:mn>
                                </mml:msup>
                                <mml:mo>&#x00d7;</mml:mo>
                                <mml:mn>721</mml:mn>
                            </mml:mrow>
                        </mml:msup>
                    </mml:math>
                </inline-formula>. We run our pipelines with and without wavelet transformation for evaluation. We first implement SVD and EBMF on 
                <italic toggle="yes">Z</italic> or on coefficient matrix 
                <italic toggle="yes">C</italic> after applying wavelet transformation and thresholding each column. We choose the number of factors 
                <italic toggle="yes">K</italic> by examining singular values. If the singular value is larger than 500, we keep it as a factor. In EBMF, we set upper bound that 
                <italic toggle="yes">K</italic> to be smaller than the corresponding 
                <italic toggle="yes">K</italic> in SVD, then let algorithm choose 
                <italic toggle="yes">K</italic> itself.</p>
            <p>Quantitative evaluation metric is conducted by cross-validation on non-zero entries. We set 5 folds cross-validation with replacement. In particular, we select random 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mfrac>
                            <mml:mn>1</mml:mn>
                            <mml:mn>5</mml:mn>
                        </mml:mfrac>
                    </mml:math>
                </inline-formula> non-zero entries in matrix 
                <italic toggle="yes">Z</italic> and set them to zero, then we save matrix as the masked matrix (train data) 
                <italic toggle="yes">Z
                    <sub>train</sub>.</italic> We store the values and position of the masked entries as test data 
                <italic toggle="yes">Z
                    <sub>test</sub>.</italic> We then ran methods on 
                <italic toggle="yes">Z
                    <sub>train</sub>.</italic> The result shows a difference in whether to use the wavelet technique. The result from SVD and EBMF are close to each other coordinate-wise, hence we only show one of them in some results below. We include the comparison between SVD and EBMF in the Section Comparison between SVD and EBMF. As in Section Simulation, we have 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mover accent="true">
                            <mml:mi>F</mml:mi>
                            <mml:mo stretchy="true">&#x0302;</mml:mo>
                        </mml:mover>
                    </mml:math>
                </inline-formula> and 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mover accent="true">
                            <mml:mi>Z</mml:mi>
                            <mml:mo stretchy="true">&#x0302;</mml:mo>
                        </mml:mover>
                    </mml:math>
                </inline-formula> from the wavelet approach and 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msub>
                            <mml:mover accent="true">
                                <mml:mi>F</mml:mi>
                                <mml:mo stretchy="true">&#x0302;</mml:mo>
                            </mml:mover>
                            <mml:mi mathvariant="italic">raw</mml:mi>
                        </mml:msub>
                    </mml:math>
                </inline-formula> and 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:msub>
                            <mml:mover accent="true">
                                <mml:mi>Z</mml:mi>
                                <mml:mo stretchy="true">&#x0302;</mml:mo>
                            </mml:mover>
                            <mml:mi mathvariant="italic">raw</mml:mi>
                        </mml:msub>
                    </mml:math>
                </inline-formula> on the original data.</p>
            <sec id="sec15">
                <title>Total error and parameter tuning</title>
                <p>We compute the reconstruction loss,
                    <disp-formula id="e6">
                        <mml:math display="block">
                            <mml:mfrac>
                                <mml:mn>1</mml:mn>
                                <mml:mi>N</mml:mi>
                            </mml:mfrac>
                            <mml:munder>
                                <mml:mo movablelimits="false">&#x2211;</mml:mo>
                                <mml:mrow>
                                    <mml:mi>i</mml:mi>
                                    <mml:mo>,</mml:mo>
                                    <mml:mi>j</mml:mi>
                                    <mml:mo>&#x2208;</mml:mo>
                                    <mml:mtext>test</mml:mtext>
                                </mml:mrow>
                            </mml:munder>
                            <mml:msup>
                                <mml:mfenced close=")" open="(">
                                    <mml:mrow>
                                        <mml:msub>
                                            <mml:mover accent="true">
                                                <mml:mi>Z</mml:mi>
                                                <mml:mo stretchy="true">&#x0302;</mml:mo>
                                            </mml:mover>
                                            <mml:mi mathvariant="italic">ij</mml:mi>
                                        </mml:msub>
                                        <mml:mo>&#x2212;</mml:mo>
                                        <mml:msub>
                                            <mml:mi>Z</mml:mi>
                                            <mml:mi mathvariant="italic">ij</mml:mi>
                                        </mml:msub>
                                    </mml:mrow>
                                </mml:mfenced>
                                <mml:mn>2</mml:mn>
                            </mml:msup>
                        </mml:math>
                    </disp-formula>where 
                    <italic toggle="yes">N</italic> is the number of test entries. We evaluate the loss only on test entries. We first tune parameters. We set up three settings: decompose the raw data with SVD or EBMF; wavelet transformation with hybrid thresholding; wavelet transformation with manual thresholding with threshold 
                    <italic toggle="yes">&#x03c4;</italic> = 10, 20, &#x2026;, 100. In each setting, we ran 100 replicates. The reconstruction error shown in 
                    <xref ref-type="fig" rid="f9">Figure 8</xref>.</p>
                <fig fig-type="figure" id="f9" orientation="portrait" position="float">
                    <label>Figure 8. </label>
                    <caption>
                        <title>The reconstruction error for different wavelet parameters.</title>
                        <p>The upper plot contains the result from all settings, the leftmost 
                            <italic toggle="yes">&#x03c4;</italic> = &#x2212;1 result is the decomposition without wavelet transformation. 
                            <italic toggle="yes">&#x03c4;</italic> = 0 indicates the hybrid thresholding. The bottom plot zooms into the experiment under manual threshold for 
                            <inline-formula>
                                <mml:math display="inline">
                                    <mml:mi>&#x03c4;</mml:mi>
                                    <mml:mo>&#x2208;</mml:mo>
                                    <mml:mfenced close="]" open="[" separators=",">
                                        <mml:mn>10</mml:mn>
                                        <mml:mn>100</mml:mn>
                                    </mml:mfenced>
                                </mml:math>
                            </inline-formula>. SVD, singular value decomposition; EBMF, Empirical Bayes Matrix Factorization.</p>
                    </caption>
                    <graphic id="gr8" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/134806/86da6507-272d-4572-b5ee-a93f96a263ce_figure8.gif"/>
                </fig>
                <p>As shown in the upper panel of 
                    <xref ref-type="fig" rid="f9">Figure 8</xref>, wavelet thresholding with manually set 
                    <italic toggle="yes">&#x03c4;</italic> reduces error compared to decomposing raw data. The wavelet has the most positive effect when 
                    <italic toggle="yes">&#x03c4;</italic> = 40, as shown in the bottom panel. We use 
                    <italic toggle="yes">&#x03c4;</italic> = 40 in our following analysis.</p>
            </sec>
            <sec id="sec16">
                <title>Genewise error</title>
                <p>We then evaluate how method performance varies for each gene by calculating the genewise reconstruction error. This reveals genes whose strong spatial expression structure leads to improved performance when using a wavelet basis. We still hold out 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mfrac>
                                <mml:mn>1</mml:mn>
                                <mml:mn>5</mml:mn>
                            </mml:mfrac>
                        </mml:math>
                    </inline-formula> of the entries at random as a test set. We compute the reconstruction error of test entries on each column of 
                    <inline-formula>
                        <mml:math display="inline">
                            <mml:mover accent="true">
                                <mml:mi>Z</mml:mi>
                                <mml:mo stretchy="true">&#x0302;</mml:mo>
                            </mml:mover>
                            <mml:mo>&#x2212;</mml:mo>
                            <mml:mi>Z</mml:mi>
                        </mml:math>
                    </inline-formula>. We calculate entry-wise loss across 100 replicates and estimate the average loss. We compare genewise errors with or without wavelet transformation. The SVD and EBMF show the same result regarding the decomposition method. We only show result of EBMF here in 
                    <xref ref-type="fig" rid="f10">Figure 9</xref>. We show the result of SVD in 
                    <xref ref-type="fig" rid="f14">Figure 13b</xref>.</p>
                <fig fig-type="figure" id="f10" orientation="portrait" position="float">
                    <label>Figure 9. </label>
                    <caption>
                        <title>The reconstruction error for each gene. The 
                            <italic toggle="yes">x</italic>-axis is the error from EBMF on raw data, the 
                            <italic toggle="yes">y</italic>-axis is the error from the combination of wavelet thresholding and EBMF.</title>
                    </caption>
                    <graphic id="gr9" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/134806/86da6507-272d-4572-b5ee-a93f96a263ce_figure9.gif"/>
                </fig>
                <p>As we can see, most of the genes have lower reconstruction error when directly applying the SVD or EBMF. However, for some genes, wavelet smoothing reduces error. For example, this is seen in genes 712, 713, 716, 719, 715, 710, 711, 721, 717, 596, 373, and 289. We conjecture that these genes have acute fluctuations as well as clear spatial patterns. The expression matrix of these genes would have a larger gradient and distinct edges. To verify our conjecture, we calculated the sum of squares of the gradient of each gene expression matrix. We show the result in 
                    <xref ref-type="fig" rid="f11">Figure 10</xref>.</p>
                <fig fig-type="figure" id="f11" orientation="portrait" position="float">
                    <label>Figure 10. </label>
                    <caption>
                        <title>The sum of squares of the gradient for each gene.</title>
                        <p>The 
                            <italic toggle="yes">y</italic>-axis is the index of each gene. Each bar&#x2019;s color shows whether that gene has better performance when using a wavelet basis. The M on the 
                            <italic toggle="yes">x</italic>-axis stands for &#x201c;millions&#x201d;. EBMF, Empirical Bayes Matrix Factorization.</p>
                    </caption>
                    <graphic id="gr10" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/134806/86da6507-272d-4572-b5ee-a93f96a263ce_figure10.gif"/>
                </fig>
                <p>The result verifies our conjecture: genes with larger gradients have better reconstruction under the wavelet-guided decomposition. This suggests a pre-processing step for selecting wavelet-suited genes by calculating the gradient of each gene. The spatially related genes would have a lower reconstruction error. Among these spatially related genes, one possibility is that we can divide them into two groups, one for decomposition directly and the other for the wavelet technique.</p>
            </sec>
            <sec id="sec17">
                <title>Factor genes</title>
                <p>Now we visualize the top factor genes and genes with high loadings on these factor genes. In 
                    <xref ref-type="fig" rid="f12">Figure 11a</xref>, we decompose 
                    <italic toggle="yes">Z</italic> using the SVD and plot the first factor gene and matrix slides of genes with the largest 5 loadings on that factor. The factor gene captured the same patterns as the original genes. To improve visualization, we find the analogous wavelet-based factors (we use manual thresholding with 
                    <italic toggle="yes">&#x03c4;</italic> = 40), shown in 
                    <xref ref-type="fig" rid="f12">Figure 11b</xref>.</p>
                <fig fig-type="figure" id="f12" orientation="portrait" position="float">
                    <label>Figure 11. </label>
                    <caption>
                        <title>The top left figure is first factor gene, the following figure by row is genes with largest loadings on first factor gene: gene 712, 713, 716, 719, 715 (authors own visualization using ggplot2 package in 
                            <ext-link ext-link-type="uri" xlink:href="https://www.r-project.org/about.html">R</ext-link>).</title>
                    </caption>
                    <graphic id="gr11" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/134806/86da6507-272d-4572-b5ee-a93f96a263ce_figure11.gif"/>
                </fig>
                <p>The wavelet thresholding approach smooths over edges in the original visualization. The first factor gene captures the global spatial expression. We have a similar result for the second factor gene, as shown in 
                    <xref ref-type="fig" rid="f13">Figure 12</xref>.</p>
                <fig fig-type="figure" id="f13" orientation="portrait" position="float">
                    <label>Figure 12. </label>
                    <caption>
                        <title>The top left figure is the second factor gene, the following figure by row is genes with largest loadings on second factor gene: gene 596, 289, 373, 712, 578 (authors own visualization using ggplot2 package in 
                            <ext-link ext-link-type="uri" xlink:href="https://www.r-project.org/about.html">R</ext-link>).</title>
                    </caption>
                    <graphic id="gr12" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/134806/86da6507-272d-4572-b5ee-a93f96a263ce_figure12.gif"/>
                </fig>
                <p>The second factor gene is orthogonal to the first one, capturing different spatial structures in gene expression. Different factor genes capture global and local structure, and using a wavelet decomposition provides denoised spatial expression visualization.</p>
                <p>In conclusion, we can use this dimensionality reduction technique for spatial gene selection and extraction. We select wavelet-suited genes based on the calculated gradient. Then, we can select spatially related genes based on reconstruction error via cross-validation. Alternatively, we can extract factor gene to capture spatial information and use factor genes for visualization and further analysis.</p>
            </sec>
            <sec id="sec18">
                <title>Software</title>
                <p>We provide a small code block to show vignette of generating 
                    <xref ref-type="fig" rid="f12">Figure 11</xref> and 
                    <xref ref-type="fig" rid="f13">Figure 12</xref>, shown in code 
                    <xref ref-type="fig" rid="f1">block 1</xref>.</p>
                <fig fig-type="figure" id="f1" orientation="portrait" position="float">
                    <label>Listing 1. </label>
                    <caption>
                        <title>The basic wavelet-based dimensionality reduction workflow.</title>
                        <p>
                            <monospace>kOverA_ST</monospace> transforms the original spatial expression context into an image. This is processed by 
                            <monospace>waveST</monospace>. The final dimensionality reduction step is performed by 
                            <monospace>decompose</monospace>.</p>
                    </caption>
                    <graphic id="gr15" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/134806/86da6507-272d-4572-b5ee-a93f96a263ce_graphics1.gif"/>
                </fig>
                <p>We have made a new package 
                    <monospace>waveST</monospace> containing the workflow we developed. The package is available at 
                    <ext-link ext-link-type="uri" xlink:href="https://github.com/OliverXUZY/waveST">GitHub</ext-link> (see Software availability). The 
                    <monospace>kOverA_ST</monospace> function reduces data from an original (
                    <xref ref-type="bibr" rid="ref24">Weber, 2021</xref>) class input using the 
                    <monospace>kOverA</monospace> technique. Then we use 
                    <monospace>waveST</monospace> function to construct a S4 class 
                    <monospace>waveST</monospace>, containing input generated by Algorithm 1. This object-oriented approach stores properties of the original spatial experiment and simplifies downstream calls, like decomposition and visualization. We use the 
                    <monospace>decompose</monospace> function to apply all decomposition methods. In line 6, we apply the SVD to our original data, setting the number of factors to 5. In line 11, we apply the wavelet-based reduction and apply a manual threshold with 
                    <italic toggle="yes">&#x03c4;</italic> = 40. We use 
                    <monospace>plot</monospace> and 
                    <monospace>k=1</monospace> to visualize the first factor gene and matrix slides of the genes with the largest 5 loadings on that factor.</p>
            </sec>
            <sec id="sec19">
                <title>Comparison between SVD and EBMF</title>
                <p>This section we provide a comparison between the reconstruction results when using SVD and EBMF. In general, we find little difference between reduction using the two methods. We first provide an element-wise comparison between the reconstruction of SVD and of EBMF with wavelet technique in 
                    <xref ref-type="fig" rid="f14">Figure 13a</xref>.</p>
                <fig fig-type="figure" id="f14" orientation="portrait" position="float">
                    <label>Figure 13. </label>
                    <caption>
                        <title>The reconstruction error for each gene.</title>
                        <p>SVD, singular value decomposition; EBMF, Empirical Bayes Matrix Factorization.</p>
                    </caption>
                    <graphic id="gr13" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/134806/86da6507-272d-4572-b5ee-a93f96a263ce_figure13.gif"/>
                </fig>
                <p>The results are similar when applied to real data. In particular, 
                    <xref ref-type="fig" rid="f14">Figure 13b</xref> shows the reconstuction error per gene result for SVD. Our findings are similar to those for 
                    <xref ref-type="fig" rid="f10">Figure 9</xref>. Similar findings are visible when using SVD and EBMF with and without an initial wavelet decomposition 
                    <xref ref-type="fig" rid="f15">Figure 14</xref>. As we can see, SVD and Empirical Bayes Matrix Factorization (EBMF) have the very similar result. This is perhaps a consequence of EMBF being initialized usign the SVD.</p>
                <fig fig-type="figure" id="f15" orientation="portrait" position="float">
                    <label>Figure 14. </label>
                    <caption>
                        <title>The reconstruction error for each gene.</title>
                        <p>SVD, singular value decomposition; EBMF, Empirical Bayes Matrix Factorization.</p>
                    </caption>
                    <graphic id="gr14" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/134806/86da6507-272d-4572-b5ee-a93f96a263ce_figure14.gif"/>
                </fig>
            </sec>
        </sec>
        <sec id="sec20" sec-type="conclusions">
            <title>Conclusions</title>
            <p>We have proposed a pipeline for dimensionality reduction that respects spatial structure. Both simulations and real data experiments demonstrate that wavelet and shrinkage techniques show positive results in spatially resolved transcriptomics data. We highlight the idea of combining image processing techniques and statistical methods for application in a spatial genomics context. One future direction is splitting genes into a groups suited or not for wavelet-based decomposition, and implementing decomposition with or without wavelet. Another direction is to focus input generation on only those genes that are thought to be spatially related. For genes not related to spatial information, we may perform regular decomposition on original data 
                <inline-formula>
                    <mml:math display="inline">
                        <mml:mi>Y</mml:mi>
                        <mml:mo>&#x2208;</mml:mo>
                        <mml:msup>
                            <mml:mi mathvariant="normal">&#x211d;</mml:mi>
                            <mml:mrow>
                                <mml:mi>N</mml:mi>
                                <mml:mo>&#x00d7;</mml:mo>
                                <mml:mi>P</mml:mi>
                            </mml:mrow>
                        </mml:msup>
                    </mml:math>
                </inline-formula>, abandoning the spatial information of those genes. We expect this to improve reconstruction performance. In further analysis, it is worth considering other wavelet smoothing techniques and wavelet filters. The current methods incorporate little biological information. Bringing more domain knowledge will require further techniques, but is expected to yield better results. The input generation computes local average over even grids; however, it is possible to apply the wavelet method for irregularly spaced data (
                <xref ref-type="bibr" rid="ref27">Nason, 2008</xref>). We hope wavelet methods will be useful in adapting existing methods for statistical genomics to the spatial setting.</p>
        </sec>
        <sec id="sec21">
            <title>Data availability</title>
            <p>We ran our method on a public spatially resolved transcriptomics data (
                <xref ref-type="bibr" rid="ref24">Weber, 2021</xref>). The dataset can be accessed in R package 
                <ext-link ext-link-type="uri" xlink:href="https://bioconductor.org/packages/release/data/experiment/html/STexampleData.html">STexampleData</ext-link> with version 3.15. The dataset represents a single biological sample from the human brain dorsolateral prefrontal cortex (DLPFC) region, measured with the 10x Genomics Visium platform. The data used for this study can be accessed though our R package available at 
                <xref ref-type="bibr" rid="ref37">Zhuoyan (2022)</xref>. The data in Simulation is labelled 
                <monospace>simulation_data</monospace> and, the data in Real Data Experiment is labelled 
                <monospace>raws</monospace>. The data source for generating 
                <italic toggle="yes">raws</italic> can be accessed though function 
                <monospace>Visium_humanDLPFC</monospace> in R package 
                <monospace>STexampleData</monospace> with version 3.15.</p>
            <sec id="sec22">
                <title>Underlying data</title>
                <p>Zenodo: OliverXUZY/waveST: waveST. 
                    <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5281/zenodo.6983923">https://doi.org/10.5281/zenodo.6983923</ext-link> (
                    <xref ref-type="bibr" rid="ref37">Zhuoyan, 2022</xref>)</p>
                <p>This project contains the following underlying data:</p>
                <p>raws.rda</p>
                <p>simulation_data.rda</p>
                <p>Data are available under the terms of the 
                    <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/publicdomain/zero/1.0/">Creative Commons Zero &#x201c;No rights reserved&#x201d; data waiver</ext-link> (CC0 1.0 Public domain dedication).</p>
            </sec>
        </sec>
        <sec id="sec23">
            <title>Software availability</title>
            <p>Source code available from: 
                <ext-link ext-link-type="uri" xlink:href="https://github.com/OliverXUZY/waveST">https://github.com/OliverXUZY/waveST</ext-link>.</p>
            <p>Archived source code at time of publication: 
                <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5281/zenodo.6983923">https://doi.org/10.5281/zenodo.6983923</ext-link>.</p>
            <p>License: The software is licensed under 
                <ext-link ext-link-type="uri" xlink:href="https://opensource.org/licenses/MIT">MIT</ext-link>
            </p>
        </sec>
    </body>
    <back>
        <ack>
            <title>Acknowledgements</title>
            <p>We thank Joseph Arthur for valuable discussions.</p>
        </ack>
        <ref-list>
            <title>References</title>
            <ref id="ref15">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Abu-Jamous</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kelly</surname>
                            <given-names>S</given-names>
                        </name>
</person-group>:
                    <article-title>Clust: automatic extraction of optimal co-expressed gene clusters from gene expression data.</article-title>
                    <source>

                        <italic toggle="yes">Genome Biol.</italic>
</source>
                    <year>2018</year>;<volume>19</volume>(<issue>1</issue>):<fpage>1</fpage>&#x2013;<lpage>11</lpage>.</mixed-citation>
            </ref>
            <ref id="ref32">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Alter</surname>
                            <given-names>O</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Brown</surname>
                            <given-names>PO</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Botstein</surname>
                            <given-names>D</given-names>
                        </name>
</person-group>:
                    <article-title>Singular value decomposition for genome-wide expression data processing and modeling.</article-title>
                    <source>

                        <italic toggle="yes">Proc. Natl. Acad. Sci.</italic>
</source>
                    <year>2000</year>;<volume>97</volume>(<issue>18</issue>):<fpage>10101</fpage>&#x2013;<lpage>10106</lpage>.
                    <pub-id pub-id-type="pmid">10963673</pub-id>
                    <pub-id pub-id-type="doi">10.1073/pnas.97.18.10101</pub-id>
                    <pub-id pub-id-type="pmcid">PMC27718</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref9">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Asp</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Giacomello</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Larsson</surname>
                            <given-names>L</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A spatiotemporal organ-wide gene expression and cell atlas of the developing human heart.</article-title>
                    <source>

                        <italic toggle="yes">Cell.</italic>
</source>
                    <year>2019</year>;<volume>179</volume>(<issue>7</issue>):<fpage>1647</fpage>&#x2013;<lpage>1660.e19</lpage>.
                    <pub-id pub-id-type="pmid">31835037</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.cell.2019.11.025</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref8">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Berglund</surname>
                            <given-names>E</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Maaskola</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Schultz</surname>
                            <given-names>N</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Spatial maps of prostate cancer transcriptomes reveal an unexplored landscape of heterogeneity.</article-title>
                    <source>

                        <italic toggle="yes">Nat. Commun.</italic>
</source>
                    <year>2018</year>;<volume>9</volume>(<issue>1</issue>):<fpage>1</fpage>&#x2013;<lpage>13</lpage>.
                    <pub-id pub-id-type="doi">10.1038/s41467-018-04724-5</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref34">
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Bishop</surname>
                            <given-names>CM</given-names>
                        </name>
</person-group>:
                    <article-title>Variational principal components.</article-title>
                    <year>1999</year>.</mixed-citation>
            </ref>
            <ref id="ref5">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Chen</surname>
                            <given-names>KH</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Boettiger</surname>
                            <given-names>AN</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Moffitt</surname>
                            <given-names>JR</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Spatially resolved, highly multiplexed rna profiling in single cells.</article-title>
                    <source>

                        <italic toggle="yes">Science.</italic>
</source>
                    <year>2015</year>;<volume>348</volume>(<issue>6233</issue>):<fpage>aaa6090</fpage>.
                    <pub-id pub-id-type="pmid">25858977</pub-id>
                    <pub-id pub-id-type="doi">10.1126/science.aaa6090</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref14">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Close</surname>
                            <given-names>JL</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Long</surname>
                            <given-names>BR</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Zeng</surname>
                            <given-names>H</given-names>
                        </name>
</person-group>:
                    <article-title>Spatially resolved transcriptomics in neuroscience.</article-title>
                    <source>

                        <italic toggle="yes">Nat. Methods.</italic>
</source>
                    <year>2021</year>;<volume>18</volume>(<issue>1</issue>):<fpage>23</fpage>&#x2013;<lpage>25</lpage>.
                    <pub-id pub-id-type="pmid">33408398</pub-id>
                    <pub-id pub-id-type="doi">10.1038/s41592-020-01040-z</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref26">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Cochran</surname>
                            <given-names>WT</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Cooley</surname>
                            <given-names>JW</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Favin</surname>
                            <given-names>DL</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>What is the fast fourier transform?</article-title>
                    <source>

                        <italic toggle="yes">Proc. IEEE.</italic>
</source>
                    <year>1967</year>;<volume>55</volume>(<issue>10</issue>):<fpage>1664</fpage>&#x2013;<lpage>1674</lpage>.
                    <pub-id pub-id-type="doi">10.1109/PROC.1967.5957</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref35">
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Daubechies</surname>
                            <given-names>I</given-names>
                        </name>
</person-group>:
                    <source>

                        <italic toggle="yes">Ten lectures on wavelets.</italic>
</source>
                    <publisher-name>SIAM</publisher-name>;<year>1992</year>.</mixed-citation>
            </ref>
            <ref id="ref28">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Donoho</surname>
                            <given-names>DL</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Johnstone</surname>
                            <given-names>JM</given-names>
                        </name>
</person-group>:
                    <article-title>Ideal spatial adaptation by wavelet shrinkage.</article-title>
                    <source>

                        <italic toggle="yes">Biometrika.</italic>
</source>
                    <year>1994</year>;<volume>81</volume>(<issue>3</issue>):<fpage>425</fpage>&#x2013;<lpage>455</lpage>.
                    <pub-id pub-id-type="doi">10.1093/biomet/81.3.425</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref29">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Donoho</surname>
                            <given-names>DL</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Johnstone</surname>
                            <given-names>IM</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Ideal denoising in an orthonormal basis chosen from a library of bases.</article-title>
                    <source>

                        <italic toggle="yes">Comptes rendus de l&#x2019;Acad&#x00e9;mie des sciences. S&#x00e9;rie I, Math&#x00e9;matique.</italic>
</source>
                    <year>1994</year>;<volume>319</volume>(<issue>12</issue>):<fpage>1317</fpage>&#x2013;<lpage>1322</lpage>.</mixed-citation>
            </ref>
            <ref id="ref30">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Donoho</surname>
                            <given-names>DL</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Johnstone</surname>
                            <given-names>IM</given-names>
                        </name>
</person-group>:
                    <article-title>Adapting to unknown smoothness via wavelet shrinkage.</article-title>
                    <source>

                        <italic toggle="yes">J. Am. Stat. Assoc.</italic>
</source>
                    <year>1995</year>;<volume>90</volume>(<issue>432</issue>):<fpage>1200</fpage>&#x2013;<lpage>1224</lpage>.
                    <pub-id pub-id-type="doi">10.1080/01621459.1995.10476626</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref22">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Dumitrascu</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Villar</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mixon</surname>
                            <given-names>DG</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Optimal marker gene selection for cell type discrimination in single cell analyses.</article-title>
                    <source>

                        <italic toggle="yes">Nat. Commun.</italic>
</source>
                    <year>2021</year>;<volume>12</volume>(<issue>1</issue>):<fpage>1</fpage>&#x2013;<lpage>8</lpage>.
                    <pub-id pub-id-type="doi">10.1038/s41467-021-21453-4</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref36">
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Gentleman</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Carey</surname>
                            <given-names>VJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Huber</surname>
                            <given-names>W</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <source>

                        <italic toggle="yes">genefilter: genefilter: methods for filtering genes from high-throughput experiments.</italic>
</source>
                    <year>2021</year>. R package version 1.74.0.</mixed-citation>
            </ref>
            <ref id="ref16">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kiselev</surname>
                            <given-names>VY</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kirschner</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Schaub</surname>
                            <given-names>MT</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Sc3 - consensus clustering of single-cell rna-seq data.</article-title>
                    <source>

                        <italic toggle="yes">Nat. Methods.</italic>
</source>
                    <year>2017</year>;<volume>14</volume>:<fpage>483</fpage>&#x2013;<lpage>486</lpage>.
                    <pub-id pub-id-type="pmid">28346451</pub-id>
                    <pub-id pub-id-type="doi">10.1038/nmeth.4236</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref17">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kiselev</surname>
                            <given-names>VY</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Yiu</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hemberg</surname>
                            <given-names>M</given-names>
                        </name>
</person-group>:
                    <article-title>scmap: projection of single-cell rna-seq data across data sets.</article-title>
                    <source>

                        <italic toggle="yes">Nat. Methods.</italic>
</source>
                    <year>2018</year>;<volume>15</volume>(<issue>5</issue>):<fpage>359</fpage>&#x2013;<lpage>362</lpage>.
                    <pub-id pub-id-type="pmid">29608555</pub-id>
                    <pub-id pub-id-type="doi">10.1038/nmeth.4644</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref6">
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kuppe</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ramirez Flores</surname>
                            <given-names>RO</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Li</surname>
                            <given-names>Z</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Spatial multi-omic map of human myocardial infarction.</article-title>
                    <source>

                        <italic toggle="yes">BioRxiv.</italic>
</source>
                    <year>2020</year>.</mixed-citation>
            </ref>
            <ref id="ref31">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Mallat</surname>
                            <given-names>SG</given-names>
                        </name>
</person-group>:
                    <article-title>A theory for multiresolution signal decomposition: the wavelet representation.</article-title>
                    <source>

                        <italic toggle="yes">IEEE Trans. Pattern Anal. Mach. Intell.</italic>
</source>
                    <year>1989</year>;<volume>11</volume>(<issue>7</issue>):<fpage>674</fpage>&#x2013;<lpage>693</lpage>.
                    <pub-id pub-id-type="doi">10.1109/34.192463</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref13">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Moffitt</surname>
                            <given-names>JR</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hao</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wang</surname>
                            <given-names>G</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>High-throughput single-cell gene-expression profiling with multiplexed error-robust fluorescence in situ hybridization.</article-title>
                    <source>

                        <italic toggle="yes">Proc. Natl. Acad. Sci.</italic>
</source>
                    <year>2016</year>;<volume>113</volume>(<issue>39</issue>):<fpage>11046</fpage>&#x2013;<lpage>11051</lpage>.
                    <pub-id pub-id-type="pmid">27625426</pub-id>
                    <pub-id pub-id-type="doi">10.1073/pnas.1612826113</pub-id>
                    <pub-id pub-id-type="pmcid">PMC5047202</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref12">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Moncada</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Barkley</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wagner</surname>
                            <given-names>F</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Integrating microarray-based spatial transcriptomics and single-cell rna-seq reveals tissue architecture in pancreatic ductal adenocarcinomas.</article-title>
                    <source>

                        <italic toggle="yes">Nat. Biotechnol.</italic>
</source>
                    <year>2020</year>;<volume>38</volume>(<issue>3</issue>):<fpage>333</fpage>&#x2013;<lpage>342</lpage>.
                    <pub-id pub-id-type="pmid">31932730</pub-id>
                    <pub-id pub-id-type="doi">10.1038/s41587-019-0392-8</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref27">
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Nason</surname>
                            <given-names>GP</given-names>
                        </name>
</person-group>:
                    <source>

                        <italic toggle="yes">Wavelet methods in statistics with R.</italic>
</source>
                    <publisher-name>Springer</publisher-name>;<year>2008</year>.</mixed-citation>
            </ref>
            <ref id="ref25">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Perperoglou</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Sauerbrei</surname>
                            <given-names>W</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Abrahamowicz</surname>
                            <given-names>M</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A review of spline function procedures in r.</article-title>
                    <source>

                        <italic toggle="yes">BMC Med. Res. Methodol.</italic>
</source>
                    <year>2019</year>;<volume>19</volume>(<issue>1</issue>):<fpage>1</fpage>&#x2013;<lpage>16</lpage>.
                    <pub-id pub-id-type="doi">10.1186/s12874-019-0666-3</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref3">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Rodriques</surname>
                            <given-names>SG</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Stickels</surname>
                            <given-names>RR</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Goeva</surname>
                            <given-names>A</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Slide-seq: A scalable technology for measuring genome-wide expression at high spatial resolution.</article-title>
                    <source>

                        <italic toggle="yes">Science.</italic>
</source>
                    <year>2019</year>;<volume>363</volume>(<issue>6434</issue>):<fpage>1463</fpage>&#x2013;<lpage>1467</lpage>.
                    <pub-id pub-id-type="pmid">30923225</pub-id>
                    <pub-id pub-id-type="doi">10.1126/science.aaw1219</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref10">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>R&#x00f6;delsperger</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ebbing</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Sharma</surname>
                            <given-names>DR</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Spatial transcriptomics of nematodes identifies sperm cells as a source of genomic novelty and rapid evolution.</article-title>
                    <source>

                        <italic toggle="yes">Mol. Biol. Evol.</italic>
</source>
                    <year>2021</year>;<volume>38</volume>(<issue>1</issue>):<fpage>229</fpage>&#x2013;<lpage>243</lpage>.
                    <pub-id pub-id-type="pmid">32785688</pub-id>
                    <pub-id pub-id-type="doi">10.1093/molbev/msaa207</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref7">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Shah</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lubeck</surname>
                            <given-names>E</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Zhou</surname>
                            <given-names>W</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>In situ transcription profiling of single cells reveals spatial organization of cells in the mouse hippocampus.</article-title>
                    <source>

                        <italic toggle="yes">Neuron.</italic>
</source>
                    <year>2016</year>;<volume>92</volume>(<issue>2</issue>):<fpage>342</fpage>&#x2013;<lpage>357</lpage>.
                    <pub-id pub-id-type="pmid">27764670</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.neuron.2016.10.001</pub-id>
                    <pub-id pub-id-type="pmcid">PMC5087994</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref19">
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Shang</surname>
                            <given-names>L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Zhou</surname>
                            <given-names>X</given-names>
                        </name>
</person-group>:
                    <article-title>Spatially aware dimension reduction for spatial transcriptomics.</article-title>
                    <source>

                        <italic toggle="yes">bioRxiv.</italic>
</source>
                    <year>2022</year>.</mixed-citation>
            </ref>
            <ref id="ref4">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Srivatsan</surname>
                            <given-names>SR</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Regier</surname>
                            <given-names>MC</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Barkan</surname>
                            <given-names>E</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Embryo-scale, single-cell spatial transcriptomics.</article-title>
                    <source>

                        <italic toggle="yes">Science.</italic>
</source>
                    <year>2021</year>;<volume>373</volume>(<issue>6550</issue>):<fpage>111</fpage>&#x2013;<lpage>117</lpage>.
                    <pub-id pub-id-type="pmid">34210887</pub-id>
                    <pub-id pub-id-type="doi">10.1126/science.abb9536</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref1">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>St&#x00e5;hl</surname>
                            <given-names>PL</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Salm&#x00e9;n</surname>
                            <given-names>F</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Vickovic</surname>
                            <given-names>S</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Visualization and analysis of gene expression in tissue sections by spatial transcriptomics.</article-title>
                    <source>

                        <italic toggle="yes">Science.</italic>
</source>
                    <year>2016</year>;<volume>353</volume>(<issue>6294</issue>):<fpage>78</fpage>&#x2013;<lpage>82</lpage>.
                    <pub-id pub-id-type="pmid">27365449</pub-id>
                    <pub-id pub-id-type="doi">10.1126/science.aaf2403</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref23">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Svensson</surname>
                            <given-names>V</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Teichmann</surname>
                            <given-names>SA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Stegle</surname>
                            <given-names>O</given-names>
                        </name>
</person-group>:
                    <article-title>Spatialde: identification of spatially variable genes.</article-title>
                    <source>

                        <italic toggle="yes">Nat. Methods.</italic>
</source>
                    <year>2018</year>;<volume>15</volume>(<issue>5</issue>):<fpage>343</fpage>&#x2013;<lpage>346</lpage>.
                    <pub-id pub-id-type="pmid">29553579</pub-id>
                    <pub-id pub-id-type="doi">10.1038/nmeth.4636</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref11">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Thrane</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Eriksson</surname>
                            <given-names>H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Maaskola</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Spatially resolved transcriptomics enables dissection of genetic heterogeneity in stage iii cutaneous malignant melanoma.</article-title>
                    <source>

                        <italic toggle="yes">Cancer Res.</italic>
</source>
                    <year>2018</year>;<volume>78</volume>(<issue>20</issue>):<fpage>5970</fpage>&#x2013;<lpage>5979</lpage>.
                    <pub-id pub-id-type="pmid">30154148</pub-id>
                    <pub-id pub-id-type="doi">10.1158/0008-5472.CAN-18-0747</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref21">
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Townes</surname>
                            <given-names>FW</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Engelhardt</surname>
                            <given-names>BE</given-names>
                        </name>
</person-group>:
                    <article-title>Nonnegative spatial factorization.</article-title>
                    <source>

                        <italic toggle="yes">arXiv preprint arXiv:2110.06122.</italic>
</source>
                    <year>2021</year>.</mixed-citation>
            </ref>
            <ref id="ref24">
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Weber</surname>
                            <given-names>LM</given-names>
                        </name>
</person-group>:
                    <source>

                        <italic toggle="yes">STexampleData: Collection of spatially resolved transcriptomics datasets in SpatialExperiment Bioconductor format.</italic>
</source>
                    <year>2021</year>. R package version 1.0.8.
                    <ext-link ext-link-type="uri" xlink:href="https://github.com/lmweber/STexampleData">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref20">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Velten</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Braunger</surname>
                            <given-names>JM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Argelaguet</surname>
                            <given-names>R</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Identifying temporal and spatial patterns of variation from multimodal data using mefisto.</article-title>
                    <source>

                        <italic toggle="yes">Nat. Methods.</italic>
</source>
                    <year>2022</year>;<volume>19</volume>:<fpage>179</fpage>&#x2013;<lpage>186</lpage>.
                    <pub-id pub-id-type="doi">10.1038/s41592-021-01343-9</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref33">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Wang</surname>
                            <given-names>W</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Stephens</surname>
                            <given-names>M</given-names>
                        </name>
</person-group>:
                    <article-title>Empirical bayes matrix factorization.</article-title>
                    <source>

                        <italic toggle="yes">J. Mach. Learn. Res.</italic>
</source>
                    <year>2021</year>;<volume>22</volume>(<issue>120</issue>):<fpage>1</fpage>&#x2013;<lpage>40</lpage>.</mixed-citation>
            </ref>
            <ref id="ref2">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Xia</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Fan</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Emanuel</surname>
                            <given-names>G</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Spatial transcriptome profiling by merfish reveals subcellular rna compartmentalization and cell cycle-dependent gene expression.</article-title>
                    <source>

                        <italic toggle="yes">Proc. Natl. Acad. Sci.</italic>
</source>
                    <year>2019</year>;<volume>116</volume>(<issue>39</issue>):<fpage>19490</fpage>&#x2013;<lpage>19499</lpage>.
                    <pub-id pub-id-type="pmid">31501331</pub-id>
                    <pub-id pub-id-type="doi">10.1073/pnas.1912459116</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref18">
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Zhu</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Sabatti</surname>
                            <given-names>C</given-names>
                        </name>
</person-group>:
                    <article-title>Integrative spatial single-cell analysis with graph-based feature learning.</article-title>
                    <source>

                        <italic toggle="yes">bioRxiv.</italic>
</source>
                    <year>2020</year>.</mixed-citation>
            </ref>
            <ref id="ref37">
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Zhuoyan</surname>
                            <given-names>XU</given-names>
                        </name>
</person-group>:
                    <article-title>OliverXUZY/waveST: waveST (v1.1.0). Zenodo. [Source code].</article-title>
                    <year>2022</year>.
                    <pub-id pub-id-type="doi">10.5281/zenodo.6983923</pub-id>
                </mixed-citation>
            </ref>
        </ref-list>
    </back>
    <sub-article article-type="reviewer-report" id="report161831">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.134806.r161831</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Zhang</surname>
                        <given-names>Shixiong</given-names>
                    </name>
                    <xref ref-type="aff" rid="r161831a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-0314-9199</uri>
                </contrib>
                <aff id="r161831a1">
                    <label>1</label>Department of Computer Science, University of Hong Kong, Hong Kong SAR, Hong Kong</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>20</day>
                <month>9</month>
                <year>2024</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2024 Zhang S</copyright-statement>
                <copyright-year>2024</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport161831" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.122775.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>reject</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>The authors proposed a method for spatial transcriptomics dimensional reduction by using wavelet transformation and matrix factorization. I should appreciate the authors' time and patience to come up with some results. However, there are several problems that deduct from the quality of this manuscript. Below are several comments on this work. 
                <list list-type="order">
                    <list-item>
                        <p>You may review and comment on state-of-the-art spatial transcriptomics data analysis methods.</p>
                    </list-item>
                    <list-item>
                        <p>You should investigate the performance of your method on downstream analysis.</p>
                    </list-item>
                    <list-item>
                        <p>Please compare your proposed method with state-of-the-art methods.</p>
                    </list-item>
                    <list-item>
                        <p>Figure 1 could benefit from enhancements, as it currently lacks clarity and informative content</p>
                    </list-item>
                    <list-item>
                        <p>The authors should proofread the English writing to improve the study.</p>
                    </list-item>
                    <list-item>
                        <p>In Conclusion, there was no mention of the limitations of the study.</p>
                    </list-item>
                </list>
            </p>
            <p>Is the rationale for developing the new method (or application) clearly explained?</p>
            <p>Yes</p>
            <p>Is the description of the method technically sound?</p>
            <p>Partly</p>
            <p>Are the conclusions about the method and its performance adequately supported by the findings presented in the article?</p>
            <p>Partly</p>
            <p>If any results are presented, are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Yes</p>
            <p>Are sufficient details provided to allow replication of the method development and its use by others?</p>
            <p>Yes</p>
            <p>Reviewer Expertise:</p>
            <p>Single-cell multiomics analysis</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to state that I do not consider it to be of an acceptable scientific standard, for reasons outlined above.</p>
        </body>
    </sub-article>
    <sub-article article-type="reviewer-report" id="report161830">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.134806.r161830</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Le</surname>
                        <given-names>Nguyen Quoc Khanh</given-names>
                    </name>
                    <xref ref-type="aff" rid="r161830a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0003-4896-7926</uri>
                </contrib>
                <aff id="r161830a1">
                    <label>1</label>Taipei Medical University, Taipei, Taiwan</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>14</day>
                <month>9</month>
                <year>2024</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2024 Le NQK</copyright-statement>
                <copyright-year>2024</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport161830" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.122775.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve-with-reservations</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>In this study, the authors proposed a spatial transcriptomics dimensionality reduction method using wavelet bases. The performance looks promising, however, some major points should be addressed as follows: 
                <list list-type="order">
                    <list-item>
                        <p>Uncertainties of models should be reported.</p>
                    </list-item>
                    <list-item>
                        <p>More benchmark comparisons should be conducted and analyzed.</p>
                    </list-item>
                    <list-item>
                        <p>The authors only listed some results without in-depth discussions. Also, they must provide more discussions on biological/clinical insights of models.</p>
                    </list-item>
                    <list-item>
                        <p>It is unclear on the model implementation part. Thus, the authors should improve this part.</p>
                    </list-item>
                </list>
            </p>
            <p>Is the rationale for developing the new method (or application) clearly explained?</p>
            <p>Yes</p>
            <p>Is the description of the method technically sound?</p>
            <p>Yes</p>
            <p>Are the conclusions about the method and its performance adequately supported by the findings presented in the article?</p>
            <p>Yes</p>
            <p>If any results are presented, are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Partly</p>
            <p>Are sufficient details provided to allow replication of the method development and its use by others?</p>
            <p>Yes</p>
            <p>Reviewer Expertise:</p>
            <p>bioinformatics; genomics analysis; artificial intelligence</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above.</p>
        </body>
    </sub-article>
    <sub-article article-type="reviewer-report" id="report161836">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.134806.r161836</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>F Rendeiro</surname>
                        <given-names>Andr&#x00e9;</given-names>
                    </name>
                    <xref ref-type="aff" rid="r161836a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0001-9362-5373</uri>
                </contrib>
                <aff id="r161836a1">
                    <label>1</label>CeMM Research Center for Molecular Medicine of the Austrian Academy of Sciences, Vienna, Austria</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>23</day>
                <month>2</month>
                <year>2023</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2023 F Rendeiro A</copyright-statement>
                <copyright-year>2023</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport161836" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.122775.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve-with-reservations</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>The work by Xu 
                <italic>et</italic> Sankaran develops a novel method for the analysis of spatial transcriptomics data that leverages wavelet transforms followed by dimensionality reduction. The authors place considerable effort in formulating and evaluating the theoretical basis for the development of their method but, in my opinion, fail to demonstrate its performance and utility. This is in large part due to the sole use of a simulated dataset and a single sample of real data, as well as the lack of direct comparison of their method to other established approaches. The software made available could be improved for general use and the work overall could use a major overhaul to improve clarity and presentation.</p>
            <p> </p>
            <p> The need for computational methods for the analysis of spatial data is well introduced but existing methods for this task lack substantiation
                <bold>.</bold> The authors mention that in other methods "
                <italic>the complex model structure and a large number of hyperparameters introduce uncertainty and noise</italic>." The authors should provide evidence that this is the case and perform a direct comparison of their methods to established ones such as SpatialLDA, SpatialPCA, MEFISTO, NSF, some of which are mentioned by the authors in the Introduction.</p>
            <p> </p>
            <p> The proposed method is applied to a dataset of one sample only with little justification given as to why this was chosen. The authors should benchmark their method on much larger sample sizes and in particular across different datasets to ensure that their method is robust to technical variation and confounders, as well as broadly applicable across biological contexts. In fact, this could help the authors in both illustrating how well their method performs as well as giving biological interpretability if their method is used in datasets which are either manually annotated or have known microanatomical domains as is the human brain. In that context, the latent factors inferred by the authors could be used to segment parts of tissue corresponding to functional domains of microanatomy.</p>
            <p> </p>
            <p> The software provided comes with no documentation on its installation and on the steps to reproduce the results. Furthermore, I would encourage the authors to share datasets in a manner agnostic to programming languages (e.g. with Parquet, H5, or CSV files) and to provide for example a Dockerfile to ensure reproducibility and explicit specification of software requirements.</p>
            <p>Is the rationale for developing the new method (or application) clearly explained?</p>
            <p>Yes</p>
            <p>Is the description of the method technically sound?</p>
            <p>Yes</p>
            <p>Are the conclusions about the method and its performance adequately supported by the findings presented in the article?</p>
            <p>No</p>
            <p>If any results are presented, are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Yes</p>
            <p>Are sufficient details provided to allow replication of the method development and its use by others?</p>
            <p>No</p>
            <p>Reviewer Expertise:</p>
            <p>Computational Biology, Single-cell technologies, Spatial-omics, Multiplexed imaging</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above.</p>
        </body>
    </sub-article>
    <sub-article article-type="reviewer-report" id="report161832">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.134806.r161832</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Fulcher</surname>
                        <given-names>Ben</given-names>
                    </name>
                    <xref ref-type="aff" rid="r161832a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-3003-4055</uri>
                </contrib>
                <aff id="r161832a1">
                    <label>1</label>School of Physics, The University of Sydney, Camperdown, NSW, Australia</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>14</day>
                <month>2</month>
                <year>2023</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2023 Fulcher B</copyright-statement>
                <copyright-year>2023</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport161832" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.122775.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve-with-reservations</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>
                <bold>SUMMARY:</bold> The paper implements and tests a wavelet transform method (combined with dimensionality reduction) to detect and visualize interesting spatial patterns of variation in a 2D spatial transcriptomics dataset. The method is tested on one simulated and one real dataset. Some advantages are found relative to SVD (which does not incorporate information about the spatial embedding). I commend the availability of the software, but the paper could be improved in its presentation and clarity of both text and figures, which could be given much more care, and present concepts with more nuance. The need for this method could also be more clearly articulated and, correspondingly, its expected impact on practice in the field.</p>
            <p> </p>
            <p> 
                <bold>Writing quality:</bold> Many sentences of the main body of the text contain some error; text should be checked for grammar and readability. This does not affect the ability of the Introduction to be followed, but clarity of communication is essential when motivating and describing a new method. (One example in Box for "
                <italic>Alogorithm</italic> [sic] 
                <italic>1</italic>": "
                <italic>...compute the coordinates of vertices of big rectangle map B cover all N samples spatially</italic>")</p>
            <p> </p>
            <p> 
                <bold>Reproducibility (Code):</bold> Provided as GitHub.</p>
            <p> </p>
            <p> 
                <bold>Reproducibility (Data):</bold> Public data available from R package.</p>
            <p> </p>
            <p> 
                <bold>COMMENTS:</bold> 
                <list list-type="bullet">
                    <list-item>
                        <p>I suppose authors assume that all genes are measured on a comparable scale? Due to differential sensitivity in the measurement process, for the same quantity of a gene transcript some genes may obtain a systematically higher reading than others. Given the thresholding to remove 'low-expression' genes, was some consideration made to an appropriate normalization?</p>
                    </list-item>
                    <list-item>
                        <p>Is it a major limitation that the method is only applicable to data with reasonable density across all a full 2D grid (required by the partition Algorithm 1)? Many, e.g. neural systems, are not of rectangular geometry and are sometimes reconstructed to 3D volumes. Could your method be adapted/extended to non-2D geometries? May be a point for discussion.</p>
                    </list-item>
                    <list-item>
                        <p>I wonder whether a comparison to a 2D Fourier basis set was attempted? The authors mention the advantages of the spatial localization of DWT bases relative to extended Fourier modes, but do not directly test the comparison in their experiments.</p>
                    </list-item>
                    <list-item>
                        <p>It could be clearer throughout (including in the abstract) why this method is needed and what impact the method may have on practice in the field.</p>
                    </list-item>
                </list> 
                <bold>MINOR:</bold> 
                <list list-type="bullet">
                    <list-item>
                        <p>"
                            <italic>Spatial resolved transcriptomics (ST)</italic>" in line 1 of main text - should be "
                            <italic>spatially resolved</italic>"? ST is defined in the latter (grammatical) form in the Abstract.</p>
                    </list-item>
                    <list-item>
                        <p>"
                            <italic>helped to answer fundamental questions</italic>&#x2026;" - could give some sense of what sorts of precise questions you mean.</p>
                    </list-item>
                    <list-item>
                        <p>Notation of expectation operator: usual to have parentheses as E(\sigma^2).</p>
                    </list-item>
                    <list-item>
                        <p>Rephrase: "
                            <italic>We have a signal or frequency to estimate.</italic>"</p>
                    </list-item>
                    <list-item>
                        <p>I think only "
                            <italic>Haar</italic>" should be italicized, not "
                            <italic>Haar mother</italic>".</p>
                    </list-item>
                    <list-item>
                        <p>"
                            <italic>wavelets oscillate and decay fast</italic>" - confusing to use a temporal descriptor like "
                            <italic>fast</italic>" in the context of describing a spatial pattern.</p>
                    </list-item>
                    <list-item>
                        <p>Imprecise: "
                            <italic>every basis element will interact with this discontinuity</italic>"</p>
                    </list-item>
                    <list-item>
                        <p>"
                            <italic>The simplest discrete wavelet transformation calculates the difference and sums between each adjacent pair.</italic>" - clarify.</p>
                    </list-item>
                    <list-item>
                        <p>Perhaps the conventional DTW process does not need to be described from scratch - authors may consider instead citing a basic text on the discrete wavelet transform, focusing the paper on the new contributions.</p>
                    </list-item>
                    <list-item>
                        <p>"
                            <italic>However, finer scale sometimes introduces more parameters to capture minor details of the sequence (overfitting).</italic>" - rephrase out the imprecise "
                            <italic>sometimes</italic>" and better explain the overfitting potential.</p>
                    </list-item>
                    <list-item>
                        <p>Rephrase: "
                            <italic>The factor genes capture the mutual underlying information of genes.</italic>"</p>
                    </list-item>
                    <list-item>
                        <p>Authors may consider more precision in, "
                            <italic>We aim to find a latent gene space that respects spatial structure.</italic>" (e.g., what aspects of spatial structure?).</p>
                    </list-item>
                    <list-item>
                        <p>Fig 2 is missing some basics of a scientific plot: units on axes, descriptions of plot elements (e.g., color is "
                            <italic>gene</italic>" but I assume it is expression with some units?). Similar criticisms apply to the other figures. E.g., Fig. 4 does not have axes nor color scale labeled in either the figure or caption.</p>
                    </list-item>
                    <list-item>
                        <p>"
                            <italic>respects spatial structure</italic>" - this phrase needs to be more precisely described - what it means to 'respect spatial structure' is key to motivating the method. I suppose it is in contrast to PCA /SVD or other statistical dim-red methods that are blind to the spatial embedding but it could be made more explicit and precise.</p>
                    </list-item>
                </list> </p>
            <p> 
                <bold>OPTIONAL (no response necessary):</bold> 
                <list list-type="bullet">
                    <list-item>
                        <p>I wonder if the authors considered putting their package on CRAN?</p>
                    </list-item>
                    <list-item>
                        <p>I wonder whether this paper is relevant?:&#x00a0;Righelli&#x00a0;
                            <italic>et al.&#x00a0;</italic>(2022)
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-161832-1">1</xref>
                            </sup> Or this one?:&#x00a0;Ghazanfar&#x00a0;
                            <italic>et al.&#x00a0;</italic>(2020)
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-161832-2">2</xref>
                            </sup>
                        </p>
                    </list-item>
                </list>
            </p>
            <p>Is the rationale for developing the new method (or application) clearly explained?</p>
            <p>Yes</p>
            <p>Is the description of the method technically sound?</p>
            <p>Partly</p>
            <p>Are the conclusions about the method and its performance adequately supported by the findings presented in the article?</p>
            <p>Yes</p>
            <p>If any results are presented, are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Yes</p>
            <p>Are sufficient details provided to allow replication of the method development and its use by others?</p>
            <p>Yes</p>
            <p>Reviewer Expertise:</p>
            <p>Analysis of gene transcriptomics data</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above.</p>
        </body>
        <back>
            <ref-list>
                <title>References</title>
                <ref id="rep-ref-161832-1">
                    <label>1</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>SpatialExperiment: infrastructure for spatially-resolved&#x00a0;transcriptomics data in R using Bioconductor.</article-title>
                        <source>
                            <italic>Bioinformatics</italic>
                        </source>.<year>2022</year>;<volume>38</volume>(<issue>11</issue>) :
                        <elocation-id>10.1093/bioinformatics/btac299</elocation-id>
                        <fpage>3128</fpage>-<lpage>3131</lpage>
                        <pub-id pub-id-type="pmid">35482478</pub-id>
                        <pub-id pub-id-type="doi">10.1093/bioinformatics/btac299</pub-id>
                    </mixed-citation>
                </ref>
                <ref id="rep-ref-161832-2">
                    <label>2</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Investigating higher-order interactions in single-cell data with scHOT.</article-title>
                        <source>
                            <italic>Nat Methods</italic>
                        </source>.<year>2020</year>;<volume>17</volume>(<issue>8</issue>) :
                        <elocation-id>10.1038/s41592-020-0885-x</elocation-id>
                        <fpage>799</fpage>-<lpage>806</lpage>
                        <pub-id pub-id-type="pmid">32661426</pub-id>
                        <pub-id pub-id-type="doi">10.1038/s41592-020-0885-x</pub-id>
                    </mixed-citation>
                </ref>
            </ref-list>
        </back>
    </sub-article>
</article>
