<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN" "http://jats.nlm.nih.gov/publishing/1.2/JATS-journalpublishing1.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="1.2" xml:lang="en">
    <front>
        <journal-meta>
            <journal-id journal-id-type="pmc">F1000Research</journal-id>
            <journal-title-group>
                <journal-title>F1000Research</journal-title>
            </journal-title-group>
            <issn pub-type="epub">2046-1402</issn>
            <publisher>
                <publisher-name>F1000 Research Limited</publisher-name>
                <publisher-loc>London, UK</publisher-loc>
            </publisher>
        </journal-meta>
        <article-meta>
            <article-id pub-id-type="doi">10.12688/f1000research.166682.3</article-id>
            <article-categories>
                <subj-group subj-group-type="heading">
                    <subject>Research Article</subject>
                </subj-group>
                <subj-group>
                    <subject>Articles</subject>
                </subj-group>
            </article-categories>
            <title-group>
                <article-title>An Improved Deep Learning Algorithm for Breast Cancer Survival Prediction Based on Multi-Omics Data</article-title>
                <fn-group content-type="pub-status">
                    <fn>
                        <p>[version 3; peer review: 3 approved]</p>
                    </fn>
                </fn-group>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Nasarudin</surname>
                        <given-names>Nurul Athirah</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-7684-4184</uri>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Al-Jasmi</surname>
                        <given-names>Fatma</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Validation</role>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Abdul Aziz</surname>
                        <given-names>Nor Hidayati</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <uri content-type="orcid">https://orcid.org/0000-0001-7995-4912</uri>
                    <xref ref-type="aff" rid="a2">2</xref>
                    <xref ref-type="aff" rid="a3">3</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Ab Aziz</surname>
                        <given-names>Nor Azlina</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-2119-6191</uri>
                    <xref ref-type="aff" rid="a2">2</xref>
                    <xref ref-type="aff" rid="a3">3</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Khan</surname>
                        <given-names>Wasif</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Software</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a4">4</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Hendrawan</surname>
                        <given-names>Yusuf</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <xref ref-type="aff" rid="a5">5</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Al Riza</surname>
                        <given-names>Dimas Firmanda</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <xref ref-type="aff" rid="a5">5</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Manzoor</surname>
                        <given-names>Ayisha</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Ali</surname>
                        <given-names>Bassam R.</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Supervision</role>
                    <uri content-type="orcid">https://orcid.org/0000-0003-1306-6618</uri>
                    <xref ref-type="corresp" rid="c1">a</xref>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Mohamad</surname>
                        <given-names>Mohd Saberi</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Supervision</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-1079-4559</uri>
                    <xref ref-type="corresp" rid="c2">b</xref>
                    <xref ref-type="aff" rid="a1">1</xref>
                    <xref ref-type="aff" rid="a2">2</xref>
                    <xref ref-type="aff" rid="a3">3</xref>
                    <xref ref-type="aff" rid="a5">5</xref>
                </contrib>
                <aff id="a1">
                    <label>1</label>Department of Genetics and Genomics, College of Medicine and Health Sciences, United Arab Emirates University, Al Ain, Abu Dhabi, 17666, United Arab Emirates</aff>
                <aff id="a2">
                    <label>2</label>Centre for Advanced Analytics, CoE for Artificial Intelligence, Multimedia University, Malacca, Malacca, 75450, Malaysia</aff>
                <aff id="a3">
                    <label>3</label>Faculty of Engineering &amp; Technology, Multimedia University, Malacca, Malacca, 75450, Malaysia</aff>
                <aff id="a4">
                    <label>4</label>J. Crayton Pruitt Family Department of Biomedical Engineering, Herbert Wertheim College of Engineering, University of Florida, Gainesville, Florida, 32 611, USA</aff>
                <aff id="a5">
                    <label>5</label>Department of Biosystems Engineering, Faculty of Agricultural Technology, Universitas Brawijaya, Malang, East Java, 65145, Indonesia</aff>
            </contrib-group>
            <author-notes>
                <corresp id="c1">
                    <label>a</label>
                    <email xlink:href="mailto:bassam.ali@uaeu.ac.ae">bassam.ali@uaeu.ac.ae</email>
                </corresp>
                <corresp id="c2">
                    <label>b</label>
                    <email xlink:href="mailto:mohd.saberi@gmail.com">mohd.saberi@gmail.com</email>
                </corresp>
                <fn fn-type="conflict">
                    <p>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>10</day>
                <month>4</month>
                <year>2026</year>
            </pub-date>
            <pub-date pub-type="collection">
                <year>2025</year>
            </pub-date>
            <volume>14</volume>
            <elocation-id>765</elocation-id>
            <history>
                <date date-type="accepted">
                    <day>12</day>
                    <month>3</month>
                    <year>2026</year>
                </date>
            </history>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2026 Nasarudin NA et al.</copyright-statement>
                <copyright-year>2026</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access article distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <self-uri content-type="pdf" xlink:href="https://f1000research.com/articles/14-765/pdf"/>
            <abstract>
                <sec>
                    <title>Background</title>
                    <p>Breast cancer is a leading cause of mortality among women worldwide. Accurate survival prediction can improve clinical decision-making and support personalized treatment planning. This study aims to develop an interpretable and effective deep learning model for breast cancer survival prediction using multi-omics
 data.</p>
                </sec>
                <sec>
                    <title>Methods</title>
                    <p>This study proposes a novel deep learning model combining Bi-directional Long Short-Term Memory (BiLSTM) and Convolutional Neural Network (CNN) architectures, integrated with Minimum Redundancy Maximum Relevance (MRMR) feature selection. The model was evaluated on two large datasets: METABRIC (n=1980) and TCGA-BRCA (n=1080), using clinical, copy number alteration (CNA), and gene expression data. Performance was assessed through metrics such as AUC-ROC and accuracy.</p>
                </sec>
                <sec>
                    <title>Results</title>
                    <p>The proposed model demonstrated superior performance compared to existing algorithms, achieving high AUC-ROC and accuracy values across all data modalities. The integration of BiLSTM and CNN architectures allowed the model to capture temporal and spatial patterns, improving prediction robustness. Notably, the model achieved an accuracy of 98% on the METABRIC dataset and 96% on the TCGA dataset.</p>
                </sec>
                <sec>
                    <title>Conclusions</title>
                    <p>The combination of BiLSTM, CNN, and MRMR offers an interpretable and accurate framework for breast cancer survival prediction using multi-omics data. This approach provides actionable insights for clinicians and highlights its potential for broader applications in oncology.</p>
                </sec>
            </abstract>
            <kwd-group kwd-group-type="author">
                <kwd>Artificial Intelligence</kwd>
                <kwd>BiLSTM</kwd>
                <kwd>Breast Cancer</kwd>
                <kwd>CNN</kwd>
                <kwd>Deep Learning</kwd>
                <kwd>Multi-omics</kwd>
            </kwd-group>
            <funding-group>
                <award-group id="fund-1" xlink:href="https://doi.org/10.13039/100012024">
                    <funding-source>Multimedia University</funding-source>
                    <award-id>GrantNo.MMUE/240128</award-id>
                </award-group>
                <award-group id="fund-2" xlink:href="https://doi.org/10.13039/100024160">
                    <funding-source>ASPIRE&#x2014;the technology programme management pillar of Abu Dhabi&#x2019;s Advanced Technology Research Council (ATRC)</funding-source>
                    <award-id>AwardnumberVRI-20-10</award-id>
                </award-group>
                <award-group id="fund-3" xlink:href="https://doi.org/10.13039/501100006013">
                    <funding-source>United Arab Emirates University</funding-source>
                    <award-id>GrantNo.12R111</award-id>
                </award-group>
                <funding-statement>This work was supported by the United Arab Emirates University through the Strategic Research Program (Grant No. 12R111) and the Fisabilillah Research Development Grant Scheme (Grant No. MMUE/240128) awarded by Multimedia University. Additional support was provided by ASPIRE&#x2014;the technology programme management pillar of Abu Dhabi&#x2019;s Advanced Technology Research Council (ATRC)&#x2014;through the ASPIRE Precision Medicine Research Institute Abu Dhabi (ASPIRE-PMRIAD) under award number VRI-20-10.  </funding-statement>
            </funding-group>
        </article-meta>
        <notes>
            <sec sec-type="version-changes">
                <label>Revised</label>
                <title>Amendments from Version 2</title>
                <p>The revised version of the manuscript includes several additions to improve clarity, reproducibility, and transparency. 1. The preprocessing workflow has been expanded and clarified. A detailed preprocessing pipeline and a flowchart have been added to explicitly describe data integration, normalization, discretization, missing-value imputation, and feature selection steps.&#x00a0; 2. Additional methodological details have been provided to strengthen reproducibility. The manuscript now specifies the random seed configuration used across Python, NumPy, and TensorFlow, as well as the exact data-splitting strategy implemented using stratified 10-fold cross-validation. The training and validation split within each fold is also clearly described to ensure that the experimental protocol can be replicated. 3. The computational environment has been documented, including hardware specifications, operating system, and software library versions. A reproducibility package and configuration file have also been referenced through an archived repository to facilitate independent verification. 4. The discussion has been expanded to better contextualize the clinical applicability of the proposed model. Additional text has been included to clarify that the current work focuses on computational methodology, while highlighting important limitations such as the use of retrospective datasets, the need for prospective validation, calibration assessment, and considerations for real-world clinical deployment. These revisions provide a more balanced interpretation of the model&#x2019;s potential clinical impact while maintaining the focus on methodological contributions.</p>
            </sec>
        </notes>
    </front>
    <body>
        <sec id="sec5" sec-type="intro">
            <title>Introduction</title>
            <p>Breast cancer is a heterogeneous disease and one of the leading causes of cancer-related death among women all over the world.
                <sup>
                    <xref ref-type="bibr" rid="ref1">1</xref>
                </sup> According to GLOBOCAN 2018, 11.6% of 9.6 million cancer cases were breast cancer, making it the most diagnosed.
                <sup>
                    <xref ref-type="bibr" rid="ref1">1</xref>
                </sup> This pattern is also found in developed countries, with an incidence rate of 54.5 per 100,000 women, especially in areas with a high Human Development Index (HDI).
                <sup>
                    <xref ref-type="bibr" rid="ref2">2</xref>
                </sup> Typically, breast cancer occurs in middle-aged and older women, but in recent times, cases among younger women under 40 have also been reported. Breast cancer in younger women often presents with more advanced stages and worse outcomes, contributing to higher mortality rates.
                <sup>
                    <xref ref-type="bibr" rid="ref3">3</xref>
                </sup> Early diagnosis and treatment improve survival rates significantly, emphasizing the need for accurate prognostic models.
                <sup>
                    <xref ref-type="bibr" rid="ref4">4</xref>
                </sup> The advent of high-throughput omics technologies allows researchers to explore complex diseases by measuring thousands of biological molecules simultaneously.
                <sup>
                    <xref ref-type="bibr" rid="ref5">5</xref>
                </sup> Combining multi-omics data with clinical features provides valuable insights into cancer progression and treatment responses, paving the way for predictive modeling of survival outcomes. Publicly available datasets like METABRIC and TCGA-BRCA have become essential resources for breast cancer research, enabling the development of survival prediction models.
                <sup>
                    <xref ref-type="bibr" rid="ref6">6</xref>
                </sup>
            </p>
            <p>Despite advances in predictive modeling, challenges remain. Traditional machine learning models often function as &#x201c;black-box&#x201d; models, limiting their clinical utility due to the lack of interpretability. Clinical decision-making requires models that provide transparent, actionable insights alongside accurate predictions. Additionally, the high-dimensionality of multi-omics data increases the risk of overfitting, making it difficult for models to generalize effectively across different patient cohorts. There is a critical need for models that balance interpretability, predictive accuracy, and computational efficiency to address these challenges.</p>
            <p>Our study aims to bridge this gap by developing a novel deep learning model that integrates Bi-directional Long Short-Term Memory (BiLSTM) and Convolutional Neural Networks (CNN) for feature extraction, along with MRMR (Minimum Redundancy Maximum Relevance) feature selection to reduce dimensionality and enhance interpretability. This approach offers improved prediction accuracy and actionable insights, making the model more suitable for clinical adoption. In contrast to previous studies that rely primarily on clinical data, our model leverages multi-omics data (e.g., gene expression, DNA methylation, miRNA, and copy number alterations) to enhance predictive performance.</p>
            <p>Several studies have explored the potential of machine learning models for survival prediction. For example, Zhao et al.
                <sup>
                    <xref ref-type="bibr" rid="ref7">7</xref>
                </sup> combined gene expression data with clinical and pathological factors, achieving AUC values of 0.72 and 0.67 using artificial neural networks (ANN) and support vector machines (SVM). Goli et al.
                <sup>
                    <xref ref-type="bibr" rid="ref8">8</xref>
                </sup> employed support vector regression for survival prediction, with promising results for imbalanced datasets. Gevaert et al.
                <sup>
                    <xref ref-type="bibr" rid="ref9">9</xref>
                </sup> used Bayesian networks to combine microarray gene expression data with clinical information, achieving a maximum AUC of 0.845. Sun et al.
                <sup>
                    <xref ref-type="bibr" rid="ref10">10</xref>
                </sup> improved predictive performance by integrating genomic and imaging data, achieving an AUC of 0.828 &#x00b1; 0.034. Ma and Zhang
                <sup>
                    <xref ref-type="bibr" rid="ref11">11</xref>
                </sup> applied factorization autoencoders to multi-omics data, achieving AUCs of 0.74 and 0.825 for bladder cancer and brain glioma, respectively. Experienced medical professionals face challenges in treating invasive breast cancer because it is difficult to synthesize and analyze large amounts of data from multiple sources.
                <sup>
                    <xref ref-type="bibr" rid="ref12">12</xref>
                </sup> The increasing availability of omics data offers new opportunities for creating predictive algorithms but introduces challenges related to data integration, heterogeneity, and high dimensionality.
                <sup>
                    <xref ref-type="bibr" rid="ref5">5</xref>
                </sup>
            </p>
            <p>This research presents a novel deep learning algorithm combining BiLSTM and CNN architectures for survival prediction, validated using METABRIC and TCGA datasets. In addition to its predictive accuracy, the model offers interpretability through feature importance analysis, enhancing its relevance for clinical decision-making. By setting a five-year survival threshold, the model classifies patients as short-term or long-term survivors, supporting physicians in tailoring treatment plans and minimizing unnecessary interventions.</p>
            <p>Through the decision-level integration of multi-omics data, this study addresses challenges related to high-dimensionality, overfitting, and model interpretability. The results demonstrate significant improvements over existing algorithms, offering a pathway toward more transparent, clinically applicable predictive models. The findings contribute to the growing field of personalized oncology, paving the way for future research and the development of prognostic tools for breast cancer.</p>
            <sec id="sec6">
                <title>Significance of the study</title>
                <p>Survival prediction in breast cancer remains a complex task due to the intrinsic high dimensionality, noise, and heterogeneity of multi-omics datasets, which pose significant challenges for conventional predictive models. Existing machine learning methods often fail to fully leverage the complementary information embedded across different omics layers and struggle to generate clinically interpretable outputs. In this study, we propose a hybrid BiLSTM+CNN deep learning architecture that effectively captures both temporal dependencies and hierarchical feature representations within integrated multi-omics data. The model demonstrates superior predictive performance on benchmark METABRIC and TCGA datasets, while incorporating interpretability mechanisms to enhance clinical relevance. By addressing both the data integration and interpretability bottlenecks, this work provides a robust and scalable framework for precision oncology applications, offering improved survival prediction capabilities that can directly inform personalized treatment strategies.</p>
            </sec>
        </sec>
        <sec id="sec7" sec-type="methods">
            <title>Methods</title>
            <sec id="sec8">
                <title>A. Datasets</title>
                <p>This study uses the METABRIC breast cancer dataset, consisting of 1980 patient records, available through the cBioPortal database (
                    <ext-link ext-link-type="uri" xlink:href="https://www.cbioportal.org/study/summary?id=brca_metabric">https://www.cbioportal.org/study/summary?id=brca_metabric</ext-link>).
                    <sup>
                        <xref ref-type="bibr" rid="ref13">13</xref>
                    </sup> The cBioPortal offers a web-based platform for exploring and visualizing multidimensional cancer genomics data, converting complex molecular profiling from cancer tissues and cell lines into readily understandable genetic, epigenetic, gene expression, and proteomic information. The dataset contains information from three data modalities: clinical profile, gene expression profile, and copy-number alteration (CNA) profile. Patients were grouped based on their survival outcomes into two categories: long-term survivors (&#x2265;5 years) with 1489 samples (labeled as &#x2018;0&#x2019;), and short-term survivors (&lt;5 years) with 491 samples (labeled as &#x2018;1&#x2019;). The median age at diagnosis for patients is 61 years, with an average survival duration of 125.1 months. 
                    <xref ref-type="table" rid="T1">
Table 1</xref> summarizes the key characteristics of the METABRIC dataset.</p>
                <table-wrap id="T1" orientation="portrait" position="float">
                    <label>
Table 1. </label>
                    <caption>
                        <title>Summary of the METABRIC dataset.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Details</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
Records</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Disease</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Breast cancer</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Number of patients</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">1980</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Survival time (years)</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">5</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Survival &gt; (5 years)</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">1489</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Survival &lt; (5 years)</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">491</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Number of modalities</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">3</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Modalities</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Clinical, Gene Expression, and CNA profile</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>The clinical features available in the METABRIC dataset include age at diagnosis, tumor size, estrogen receptor status, HER2 status, and stage at diagnosis. During pre-processing, two of the original 27 clinical features were removed due to missing data and redundancy, reducing the number of clinical features to 25. This feature reduction ensures that only the most relevant variables are retained, enhancing the predictive capacity of the proposed model. To provide deeper insights, a univariate t-test analysis was conducted on key clinical features to assess how they differ between short-term and long-term survivors. The results of the descriptive statistics and t-tests are presented in 
                    <xref ref-type="table" rid="T2">
Table 2</xref> above. Although the METABRIC dataset contained 25 clinical features after preprocessing, the t-test analysis highlighted a subset of widely recognized prognostic indicators, including age, tumor size, estrogen receptor status, HER2 status, and stage at diagnosis. These variables were chosen because they are consistently associated with breast cancer survival outcomes and provide clinically interpretable insights, as also emphasized in prior studies.
                    <sup>
                        <xref ref-type="bibr" rid="ref20">14</xref>
                    </sup> The remaining clinical variables were retained for model training but are not individually reported here, as they exhibited limited statistical differentiation between survivor groups.</p>
                <table-wrap id="T2" orientation="portrait" position="float">
                    <label>
Table 2. </label>
                    <caption>
                        <title>Descriptive statistics and univariate t-test results for clinical data in METABRIC dataset.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">
Clinical feature</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Short-term survivors (Mean &#x00b1; SD)</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Long-term survivors (Mean &#x00b1; SD)</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
t-value
</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
p-value
</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Age at Diagnosis (years)</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">55.3 &#x00b1; 10.1</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">63.2 &#x00b1; 9.8</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">2.23</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.03</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Tumor Size (cm)</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">4.2 &#x00b1; 1.5</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">2.8 &#x00b1; 1.3</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">2.71</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.01</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Estrogen Receptor (%)</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">60 &#x00b1; 12</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">72 &#x00b1; 10</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">2.08</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.04</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">HER2 Status (%)</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">50 &#x00b1; 15</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">48 &#x00b1; 12</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.12</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Stage at Diagnosis</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">3.1 &#x00b1; 0.8</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">2.5 &#x00b1; 0.6</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">2.58</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.02</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>As shown in 
                    <xref ref-type="table" rid="T2">
Table 2</xref>, several clinical features differ significantly between the two survivor groups. Long-term survivors tended to be older at the time of diagnosis, with a mean age of 63.2 years, compared to 55.3 years for short-term survivors (p = 0.03). In addition, tumor sizes were notably smaller among long-term survivors (2.8 cm) compared to those in the short-term group (4.2 cm), with a p-value of 0.01. Similarly, the stage at diagnosis was lower for long-term survivors, indicating early detection and more favorable prognoses (p = 0.02). However, HER2 status showed no statistically significant difference (p = 0.12), suggesting it may not directly influence survival outcomes. These findings emphasize the role of early detection, tumor size, and diagnosis stage in predicting long-term survival, which are essential factors for clinical decision-making.</p>
                <p>The heterogeneity in the METABRIC dataset reflects the merging of data from multiple hospitals, leading to variability in clinical practices, treatment protocols, and laboratory standards. In addition, patients often received concomitant medications alongside primary treatments, such as supplements or medications to manage side effects. These factors underscore the complexity of predicting survival outcomes in breast cancer patients. To further validate the model&#x2019;s generalizability, the TCGA-BRCA dataset was employed. This dataset, available through the GDC portal (
                    <ext-link ext-link-type="uri" xlink:href="https://portal.gdc.cancer.gov/projects/TCGA-BRCA">https://portal.gdc.cancer.gov/projects/TCGA-BRCA
</ext-link>), contains 1080 patient records with the same three data modalities as METABRIC&#x2014;clinical profile, gene expression profile, and copy-number alteration (CNA) profile.
                    <sup>
                        <xref ref-type="bibr" rid="ref14">15</xref>
                    </sup> 
                    <xref ref-type="table" rid="T3">
Table 3</xref> below summarizes the details of the TCGA-BRCA dataset.</p>
                <table-wrap id="T3" orientation="portrait" position="float">
                    <label>
Table 3. </label>
                    <caption>
                        <title>Summary of the TCGA-BRCA dataset.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Details</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
Records</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Disease</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Breast cancer</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Number of patients</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">1080</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Survival time (years)</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">5</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Survival &gt; (5 years)</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">250</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Survival &lt; (5 years)</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">830</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Number of modalities</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">3</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Modalities</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Clinical, Gene Expr, and CNA profile</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
            </sec>
            <sec id="sec9">
                <title>B. Data augmentation</title>
                <p>Deep learning (DL) models have demonstrated remarkable achievements in tasks involving histological images, topographies, and clinical data. However, their performance with gene expression data remains constrained due to the complex nature and high dimensionality of such datasets, which often require thousands of instances to achieve reliable outcomes. To mitigate these challenges, Data Augmentation (DA) techniques can be adapted for transcriptomic data, although their application is less common than in imaging tasks. In this study, we employed noise injection as augmentation strategies. Noise injection has been shown to be particularly effective for omics data, as it simulates the measurement variability inherent in RNA-seq and microarray platforms while generating synthetic yet biologically plausible training instances. Prior studies such as Islam et al. demonstrated that injecting noise into gene expression features improved classification robustness across cancer datasets.
                    <sup>
                        <xref ref-type="bibr" rid="ref24">16</xref>
                    </sup> Additionally, gene expression data introduces challenges due to its variability and susceptibility to the curse of dimensionality&#x2014;where datasets contain more features than available samples. As a result, DA techniques play a pivotal role in increasing the size of training datasets by generating synthetic data samples, thereby improving the generalization capacity of models.</p>
                <p>In this study, random rotation and noise injection techniques were applied to gene expression data as DA methods. The noise injection technique involved randomly selecting training samples and altering up to 25% of their features. The noise was generated from a normal distribution with a standard deviation of 0.2 and was added to the original feature values. To ensure data validity, the modified values were clamped within the range of [0, 1]. The selected standard deviation value (0.2) ensured that the augmented samples remained close to the original data instances. Random rotation, on the other hand, was adapted to the feature space rather than physical geometry. Each gene expression profile was treated as a high-dimensional vector, and random orthogonal transformations were applied to rotate these vectors while preserving their variance and overall structure. This produced synthetic samples that retained the statistical properties of the dataset but in alternative orientations of the feature space. Such transformations are conceptually related to PCA-based rotations and latent-space perturbations used in omics DA studies.
                    <sup>
                        <xref ref-type="bibr" rid="ref24">16</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref25">17</xref>
                    </sup> From a biological perspective, these augmentations capture natural variability in gene&#x2013;gene correlation structures across patients or cohorts, helping the model learn robust interaction patterns instead of relying on fixed dependencies. A similar idea was demonstrated by previous study, where latent-space mixing in single-cell RNA-seq generated biologically plausible samples that enhanced generalization.
                    <sup>
                        <xref ref-type="bibr" rid="ref25">17</xref>
                    </sup> Recent research on DL models for genomic datasets highlights the potential benefits of DA techniques, although the application of DA to genomic data remains relatively unexplored.
                    <sup>
                        <xref ref-type="bibr" rid="ref15">18</xref>,
                        <xref ref-type="bibr" rid="ref16">19</xref>
                    </sup> The integration of DA techniques in this study addresses imbalances in gene expression data and enhances the predictive capabilities of the model by preventing overfitting to limited sample sizes.</p>
                <p>To ensure reproducibility of the stochastic processes involved in data augmentation, model training, and dataset partitioning, all random operations were controlled using a fixed global random seed of 42 across NumPy, TensorFlow, and Python&#x2019;s built-in random module. For model evaluation, ten-fold cross-validation was implemented using sklearn.model_selection.StratifiedKFold (n_splits = 10, shuffle = True, random_state = 42) to maintain class distribution across folds. The same data splits were consistently applied across all modalities and compared algorithms to ensure fair evaluation. Within each fold, the training set was further divided into training and validation subsets using an 80/20 split with random_state = 42.</p>
            </sec>
            <sec id="sec10">
                <title>C. Pre-processing data</title>
                <p>This study utilized three key data modalities: clinical profile, gene expression profile, and copy-number alteration (CNA) profile. Each of these datasets underwent a thorough pre-processing pipeline to ensure data quality and consistency for analysis. The pre-processing steps involved handling missing values, normalization, feature discretization, and feature selection, all of which were necessary to prepare the data for the deep learning algorithm. 
                    <xref ref-type="fig" rid="f1">
Figure 1</xref> shows the flowchart of preprocessing data.</p>
                <fig fig-type="figure" id="f1" orientation="portrait" position="float">
                    <label>
Figure 1. </label>
                    <caption>
                        <title>Flowchart of preprocessing data.</title>
                    </caption>
                    <graphic id="gr1" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/197690/355e21ee-ec80-4f56-8556-c539a6cf740f_figure1.gif"/>
                </fig>
                <p>To address missing values in the gene expression and CNA datasets, the weighted nearest neighbor (KNN) algorithm was employed.
                    <sup>
                        <xref ref-type="bibr" rid="ref17">20</xref>
                    </sup> This algorithm identifies the nearest samples based on Euclidean distance and estimates missing values using a weighted average of neighbouring samples. The imputation process used k = 10 nearest neighbors, with Euclidean distance as the similarity metric and inverse distance weighting applied during estimation. After the imputation, the datasets were normalized to maintain a consistent scale for all features.
                    <sup>
                        <xref ref-type="bibr" rid="ref9">9</xref>
                    </sup> Following normalization, the CNA data was discretized into five categories: &#x2212;2, &#x2212;1, 0, 1, and 2, representing varying levels of copy number variation. Similarly, the gene expression values were categorized into three classes: -1 (under-expressed genes), 0 (baseline genes), and 1 (over-expressed genes). These discretization steps ensured that the features were more interpretable and ready for machine learning processing.</p>
                <p>Given the high-dimensional nature of both the CNA and gene expression datasets, feature selection was essential to reduce dimensionality and improve the model&#x2019;s generalizability. The Maximum Relevance Minimum Redundancy (MRMR) algorithm was selected for this task due to its ability to identify features that are highly relevant to the target variable while minimizing redundancy among selected features. Alternative methods, such as LASSO regression and Principal Component Analysis (PCA), were considered; however, MRMR was chosen because it provided better interpretability and reduced the risk of overfitting in our experiments. The MRMR algorithm was implemented using the Mutual Information Difference (MID) criterion to rank and select relevant features, where mutual information was estimated using a discrete mutual information approach. The class variable was defined as a binary survival label (0 representing long-term survival and 1 representing short-term survival). Feature selection was performed using an incremental greedy forward selection strategy, where features were sequentially added based on their MRMR ranking.</p>
                <p>Among the three modalities used in this study, copy number alteration (CNA) and gene expression data posed significant dimensionality challenges, with each CNA sample containing over 26,000 features and gene expression profiles exceeding 24,000 features. When the number of features exceeds the number of observations, models tend to overfit easily, a problem commonly referred to as the curse of dimensionality. To address this issue, the feature selection process was conducted using a gradational strategy, where subsets of features were generated in increments of 100 and evaluated using the Area Under the Receiver Operating Characteristic Curve (AUC-ROC) as the performance metric. The MRMR algorithm was executed across feature set sizes ranging from 100 to 500, and the subset yielding the highest validation performance was selected. A 5-fold cross-validation strategy was applied during evaluation to ensure robustness and reduce the risk of overfitting. This tuning process identified 400 features for gene expression and 200 features for CNA as optimal, balancing predictive accuracy with model generalizability. The clinical dataset, which contained 27 initial variables, was reduced to 25 key features after preprocessing. These included well-known prognostic indicators such as hormone receptor status, tumor size, menopausal state, lymph node positivity, histological grade, treatment type, and surgical information, all of which have established relevance to breast cancer survival outcomes.</p>
                <p>From the CNA profile, the feature count was reduced from 26,298 to 200, and from the gene expression profile, it was narrowed down from 24,368 to 400. For the clinical dataset, the original 27 features were reduced to 25 after removing two features with missing data. The resulting pre-processed dataset, summarized in 
                    <xref ref-type="table" rid="T4">
Table 4</xref> above, served as the input for the proposed deep learning algorithm, enabling more accurate survival predictions.</p>
                <table-wrap id="T4" orientation="portrait" position="float">
                    <label>
Table 4. </label>
                    <caption>
                        <title>Pre-processed data.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Data modality</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Total features</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
Selected features</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Clinical</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">27</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">25</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">CNA profile</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">26298</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">200</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Gene expression</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">24368</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">400</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
            </sec>
            <sec id="sec11">
                <title>D. Convolutional Neural Network (CNN)</title>
                <p>Convolutional Neural Network (CNN) is a type of feed-forward neural network widely used for tasks involving image processing, natural language processing (NLP), and time series data prediction.
                    <sup>
                        <xref ref-type="bibr" rid="ref18">21</xref>
                    </sup> One of the key advantages of CNN is its local perception mechanism and weight sharing across different layers. This design significantly reduces the number of parameters, thereby improving the model&#x2019;s efficiency in training and generalization. A typical CNN model is composed of three essential components: the convolution layer, pooling layer, and fully connected layer. The convolution layers extract relevant features from the input data, though the extracted features may have a high dimensionality. To address this, a pooling layer is applied after each convolution layer, which reduces the feature dimensions and computational cost while retaining the most important information.</p>
                <p>While CNN has demonstrated exceptional performance in many domains, it has limited capacity to process large-scale, multi-modal data such as genomic and clinical datasets. Recently, researchers have focused on multi-source data integration to enhance the predictive capabilities of deep learning models. These advanced deep learning algorithms that combine multiple data modalities exhibit superior performance over models that rely solely on a single data source. The integration of CNN into such frameworks makes it a promising tool for tasks like cancer survival prediction by efficiently capturing feature patterns across different data types.</p>
            </sec>
            <sec id="sec12">
                <title>E. Bi-Directional LSTM (BiLSTM)</title>
                <p>The traditional Long Short-Term Memory (LSTM) network, though effective in modeling sequential data, processes information in only one direction&#x2014;either forward or backward in the sequence.
                    <sup>
                        <xref ref-type="bibr" rid="ref19">22</xref>
                    </sup> This limitation can hinder the model&#x2019;s ability to fully capture the temporal dependencies inherent in sequential datasets. To overcome this, the Bi-directional Long Short-Term Memory (BiLSTM) network was developed, enabling the processing of information in both directions&#x2014;forward and backward. The core idea of BiLSTM is to analyze sequences both front-to-back and back-to-front. In this model, one LSTM layer processes the sequence from the start to the end, while another layer processes it from the end to the start. This dual-directional processing allows the network to retain information from both past and future contexts, making it particularly useful for analyzing time series data and sequential inputs from multi-omics datasets.</p>
                <p>In this study, the input data from multi-omics sources is first processed by two BiLSTM layers in the initial module. The extracted features are then passed to the CNN layers in the subsequent module for further feature extraction and dimensionality reduction. The combined BiLSTM and CNN architecture ensures that both the temporal dependencies and spatial patterns in the data are captured. In the final stage, the fully connected layers generate predictions about patient survival, classifying breast cancer patients as either short-term or long-term survivors.</p>
            </sec>
            <sec id="sec13">
                <title>F. An improved deep learning algorithm: BiLSTM and CNN algorithm for prediction using multi-omics data</title>
                <p>This study presents an improved deep learning algorithm by integrating a Bi-directional Long Short-Term Memory (BiLSTM) network with a Convolutional Neural Network (CNN) to predict breast cancer survival and extract meaningful features from multi-omics data. The BiLSTM addresses the limitations of traditional LSTM networks, while CNN complements it by capturing the spatial patterns in the data. The proposed combination offers superior performance by leveraging the strengths of both models: BiLSTM for temporal sequence learning and CNN for feature extraction. An overview of the proposed model is illustrated in 
                    <xref ref-type="fig" rid="f2">
Figure 2</xref> below:</p>
                <fig fig-type="figure" id="f2" orientation="portrait" position="float">
                    <label>
Figure 2. </label>
                    <caption>
                        <title>An overview of the proposed improved deep learning algorithm namely BiLSTM+CNN.</title>
                    </caption>
                    <graphic id="gr2" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/197690/355e21ee-ec80-4f56-8556-c539a6cf740f_figure2.gif"/>
                </fig>
                <p>During the initial phase, both BiLSTM and CNN layers are configured with specific filters to extract key features. These features are processed through convolution and dense layers, generating a feature map that feeds into subsequent stages of the model. As described in,
                    <sup>
                        <xref ref-type="bibr" rid="ref20">14</xref>
                    </sup> the Glorot normal initializer is used to initialize the filter values, ensuring that the parameters follow a normal distribution with a mean of zero. A fixed seed value of 0.1 is used to maintain consistency in model training, preventing variation in results between different runs.</p>
                <p>Hyperparameter tuning was conducted using a grid search approach. The key parameters tuned included the number of layers, filter sizes, learning rate, and regularization strength. For each parameter combination, model performance was evaluated on the validation set using AUC-ROC as the primary metric. The final configuration (outlined in 
                    <xref ref-type="table" rid="T5">
Table 5</xref>) was selected based on the highest AUC and accuracy scores observed during cross-validation. This tuning process ensured optimal performance while preventing overfitting. The parameters and architecture of the BiLSTM+CNN algorithm is detailed in 
                    <xref ref-type="table" rid="T5">
Table 5</xref> below:</p>
                <table-wrap id="T5" orientation="portrait" position="float">
                    <label>
Table 5. </label>
                    <caption>
                        <title>Architecture and parameter related details of BiLSTM+CNN algorithm.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Component</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Parameter</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
Details</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">BiLSTM Layer</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">LSTM Layers</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">2</td>
                            </tr>
                            <tr>
                                <td colspan="1" rowspan="1"/>
                                <td align="left" colspan="1" rowspan="1" valign="top">Number of Hidden Units</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">32</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Convolutional Layer</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Convolutional Layers</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">2</td>
                            </tr>
                            <tr>
                                <td colspan="1" rowspan="1"/>
                                <td align="left" colspan="1" rowspan="1" valign="top">Filter Size</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">15 &#x00d7; 15</td>
                            </tr>
                            <tr>
                                <td colspan="1" rowspan="1"/>
                                <td align="left" colspan="1" rowspan="1" valign="top">Number of Filters</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">25</td>
                            </tr>
                            <tr>
                                <td colspan="1" rowspan="1"/>
                                <td align="left" colspan="1" rowspan="1" valign="top">Stride Size</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">2</td>
                            </tr>
                            <tr>
                                <td colspan="1" rowspan="1"/>
                                <td align="left" colspan="1" rowspan="1" valign="top">Padding</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Same</td>
                            </tr>
                            <tr>
                                <td colspan="1" rowspan="1"/>
                                <td align="left" colspan="1" rowspan="1" valign="top">Activation Function</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">ReLU</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Fully Connected Layer</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Number of Hidden Layers</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">2</td>
                            </tr>
                            <tr>
                                <td colspan="1" rowspan="1"/>
                                <td align="left" colspan="1" rowspan="1" valign="top">Hidden Units in Each Layer</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">150, 100</td>
                            </tr>
                            <tr>
                                <td colspan="1" rowspan="1"/>
                                <td align="left" colspan="1" rowspan="1" valign="top">Activation Function</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">TANH</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Output Layer</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Activation Function</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Sigmoid</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Training Configuration</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Number of Training Epochs</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">20</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Loss Function</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Loss Function Used</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Binary Cross-Entropy + L2 Regularization</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>Below is an overview of the BiLSTM+CNN algorithm:</p>
                <boxed-text id="B1" orientation="portrait" position="float">
                    <label>Algorithm 1. </label>
                    <caption>
                        <title>BiLSTM+CNN.</title>
                    </caption>
                    <p>

                        <bold>Input</bold> Dataset (Clinical, CNA, Gene exp), number of epochs N, number of folds K</p>
                    <p>

                        <bold>Output</bold> Extraction features
                        <list list-type="order">
                            <list-item>
                                <label>1.</label>
                                <p>Initialize the BiLSTM+CNN algorithm with the required parameters.</p>
                            </list-item>
                            <list-item>
                                <label>2.</label>
                                <p>Perform train-test split: TrainData, TestData.</p>
                            </list-item>
                            <list-item>
                                <label>3.</label>
                                <p>Partition TrainData into K subsets F
                                    <sub>1</sub>, &#x2026;, F
                                    <sub>K</sub>.</p>
                            </list-item>
                            <list-item>
                                <label>4.</label>
                                <p>

                                    <bold>For k = </bold>1 to 
                                    <bold>K</bold>:
&#x2003;&#x2022; Data_train = dataset &#x2212;Fk&#x2003;&#x2022; Data_valid = Fk</p>
                            </list-item>
                            <list-item>
                                <label>5.</label>
                                <p>For epoch e = 1 to N:
&#x2003;&#x2022; Train BiLSTM+CNN using Data_train.&#x2003;&#x2022; Validate the model with Data_valid.</p>
                            </list-item>
                            <list-item>
                                <label>6.</label>
                                <p>Test the model using TestData.</p>
                            </list-item>
                            <list-item>
                                <label>7.</label>
                                <p>End Procedure.</p>
                            </list-item>
                        </list>
                    </p>
                </boxed-text>
                <p>The pre-processed data serves as input for the BiLSTM layers, whose outputs are subsequently passed to the CNN layers for further feature transformation. Within the convolutional layers, a stride size of 2 is employed, meaning the filter shifts by 2 units across the input matrix during the convolution operation. Padding is applied to ensure that feature sizes remain consistent. A flattened layer follows, converting the multi-dimensional output into a format suitable for the dense layer, which comprises 150 hidden units. To mitigate overfitting, L2 regularization is incorporated within the CNN model.
                    <sup>
                        <xref ref-type="bibr" rid="ref21">23</xref>
                    </sup> The activation functions used include ReLU for the convolutional layer and sigmoid for the dense layer. The Adam optimizer
                    <sup>
                        <xref ref-type="bibr" rid="ref22">24</xref>
                    </sup> was employed for optimization, with binary cross-entropy serving as the loss function for the binary classification task of predicting patient survival.</p>
                <p>The proposed BiLSTM+CNN algorithm is divided into three phases:
                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Phase 1: The algorithm is trained using clinical data, CNA data, and gene expression data.</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Phase 2: A stacked feature set is created from the extracted features of the BiLSTM+CNN model.</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Phase 3: The stacked feature set is passed through a Random Forest (RF) algorithm for final classification. The architecture of the BiLSTM+CNN</p>
                        </list-item>
                    </list>
                </p>
                <p>Stacked RF model is depicted in 
                    <xref ref-type="fig" rid="f3">
Figure 3</xref> below:</p>
                <fig fig-type="figure" id="f3" orientation="portrait" position="float">
                    <label>
Figure 3. </label>
                    <caption>
                        <title>BiLSTM+CNN stacked RF architecture.</title>
                    </caption>
                    <graphic id="gr3" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/197690/355e21ee-ec80-4f56-8556-c539a6cf740f_figure3.gif"/>
                </fig>
                <p>The final classification employed a Random Forest (RF) model with n_estimators = 200, max_depth = None, random_state = 0, and class_weight = &#x2018;balanced&#x2019;. These parameters were selected through grid search, ensuring the best AUC and accuracy scores during cross-validation. This configuration offered the optimal balance between precision and recall, especially for the imbalanced classes in our dataset. Using class_weight = &#x2018;balanced&#x2019; mitigated the risk of overlooking minority classes, while setting max_depth = None enabled the model to capture complex feature interactions without overfitting.</p>
            </sec>
            <sec id="sec14">
                <title>G. Performance evaluation and metrics</title>
                <p>The performance of the proposed BiLSTM+CNN algorithm was evaluated using several metrics, including Sensitivity, Specificity, Precision, and Accuracy and the Area Under the Curve (AUC) of the Receiver Operating Characteristic (ROC) curve. The metrics are defined as follows:
                    <disp-formula id="e1">

                        <mml:math display="block">
                            <mml:mtext mathvariant="italic">Sensitivity</mml:mtext>
                            <mml:mo>=</mml:mo>
                            <mml:mfrac>
                                <mml:mi mathvariant="italic">Tp</mml:mi>
                                <mml:mrow>
                                    <mml:mi mathvariant="italic">Tp</mml:mi>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi mathvariant="italic">Fn</mml:mi>
                                </mml:mrow>
                            </mml:mfrac>
                        </mml:math>

                        <label>(1)</label>
</disp-formula>

                    <disp-formula id="e2">

                        <mml:math display="block">
                            <mml:mtext mathvariant="italic">Specificity</mml:mtext>
                            <mml:mo>=</mml:mo>
                            <mml:mfrac>
                                <mml:mi mathvariant="italic">Tn</mml:mi>
                                <mml:mrow>
                                    <mml:mi mathvariant="italic">Tn</mml:mi>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi mathvariant="italic">Fn</mml:mi>
                                </mml:mrow>
                            </mml:mfrac>
                        </mml:math>

                        <label>(2)</label>
</disp-formula>

                    <disp-formula id="e3">

                        <mml:math display="block">
                            <mml:mtext mathvariant="italic">Precision</mml:mtext>
                            <mml:mo>=</mml:mo>
                            <mml:mfrac>
                                <mml:mi mathvariant="italic">Tp</mml:mi>
                                <mml:mrow>
                                    <mml:mi mathvariant="italic">Tp</mml:mi>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi mathvariant="italic">Fp</mml:mi>
                                </mml:mrow>
                            </mml:mfrac>
                        </mml:math>

                        <label>(3)</label>
</disp-formula>

                    <disp-formula id="e4">

                        <mml:math display="block">
                            <mml:mtext mathvariant="italic">Accuracy</mml:mtext>
                            <mml:mo>=</mml:mo>
                            <mml:mfrac>
                                <mml:mrow>
                                    <mml:mi mathvariant="italic">Tp</mml:mi>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi mathvariant="italic">Tn</mml:mi>
                                </mml:mrow>
                                <mml:mrow>
                                    <mml:mi mathvariant="italic">Tp</mml:mi>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi mathvariant="italic">Tn</mml:mi>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi mathvariant="italic">Fp</mml:mi>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi mathvariant="italic">Fn</mml:mi>
                                </mml:mrow>
                            </mml:mfrac>
                        </mml:math>

                        <label>(4)</label>
</disp-formula>
                </p>
                <p>Here, TP (true positive), TN (true negative), FP (false positive), and FN (false negative) denote the classification outcomes. Additionally, the AUC-ROC curve assesses the model&#x2019;s ability to distinguish between classes across various thresholds, providing a comprehensive view of performance beyond a single point metric.</p>
            </sec>
            <sec id="sec15">
                <title>H. Cross validation</title>
                <p>The ten-fold cross-validation approach was adopted for model evaluation, following recommendations from prior studies.
                    <sup>
                        <xref ref-type="bibr" rid="ref20">14</xref>
                    </sup> In this method, the dataset is randomly divided into ten equal subsets. For each fold, nine subsets are used for training, while one subset is held out for testing. This process ensures that every data point is used for both training and testing, thereby providing a more reliable performance estimate.</p>
                <p>Within each merged training set, 80% of the data is allocated for training the model, while the remaining 20% is reserved for validation to fine-tune hyperparameters and prevent overfitting. The Keras and TensorFlow libraries were employed for model implementation, ensuring computational efficiency and ease of experimentation.</p>
            </sec>
            <sec id="sec2">
                <title>G. Computational environment</title>
                <p>The computational environment, all experiments were conducted on a workstation running Ubuntu 20.04 LTS equipped with an NVIDIA GeForce RTX 3090 GPU (24 GB VRAM) and 64 GB RAM. The software stack comprised Python 3.9.7, TensorFlow 2.8.0, Keras 2.8.0, scikit-learn 1.0.2, NumPy 1.21.5, and pandas 1.3.5. A configuration file and step-by-step reproducibility protocol are provided in the archived software repository (
                    <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5281/zenodo.15964646">https://doi.org/10.5281/zenodo.15964646</ext-link>).</p>
            </sec>
        </sec>
        <sec id="sec16" sec-type="results">
            <title>Results</title>
            <sec id="sec17">
                <title>A. Performance of The Improved Deep Learning Algorithm (BiLSTM+CNN)</title>
                <p>The proposed deep learning algorithm leverages BiLSTM and CNN for feature extraction from multi-omics data. The AUC metric from the ROC curve, along with accuracy, is used to evaluate the model&#x2019;s performance. 
                    <xref ref-type="fig" rid="f4">
Figure 4</xref> below shows the ROC curves of the BiLSTM+CNN compared to CNN for the METABRIC dataset. The AUC values are 0.90, 0.87, and 0.87 for clinical data, CNA, and gene expression data, respectively. To provide a more comprehensive view of the model&#x2019;s performance, we report the 95% confidence intervals (CI) for each modality. These CIs offer an estimate of variability across different trials, ensuring more reliable interpretation of results:
                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Clinical data: Accuracy = 0.90, 95% CI [0.9027, 0.8973]</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>CNA data: Accuracy = 0.867, 95% CI [0.872, 0.868]</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Gene expression data: Accuracy = 0.876, 95% CI [0.8812, 0.8788]
</p>
                        </list-item>
                    </list>
                </p>
                <fig fig-type="figure" id="f4" orientation="portrait" position="float">
                    <label>
Figure 4. </label>
                    <caption>
                        <title>ROC curve of improved deep learning algorithm (BiLSTM+CNN) and CNN as feature extractors on METABRIC data.</title>
                    </caption>
                    <graphic id="gr4" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/197690/355e21ee-ec80-4f56-8556-c539a6cf740f_figure4.gif"/>
                </fig>
                <p>These results indicate the model&#x2019;s robustness across different data modalities, though performance on gene expression data is slightly lower, reflecting the challenges posed by high-dimensional data. The third plot in the above 
                    <xref ref-type="fig" rid="f4">
Figure 4</xref> is presented with data points rather than a continuous line to highlight specific thresholds along the ROC curve. This visualization helps illustrate how key decision points, such as cutoff thresholds, impact the true positive (TPR) and false positive rates (FPR). Disconnected lines may occur due to discrete prediction values or gaps in the input data, especially when thresholds do not span the full range of possible values. While this approach enhances interpretability, future iterations could explore smoothing techniques to provide a continuous curve.</p>
            </sec>
            <sec id="sec18">
                <title>B. Addressing model limitations</title>
                <p>Our model&#x2019;s false positive rate was higher than expected, which could have clinical implications. To address potential overfitting and variance due to the small dataset size, we applied ten-fold cross-validation. The 1980-patient dataset was split into ten subsets, with nine subsets for training and one for testing. Each training set was further divided into 80% for training and 20% for validation.</p>
                <p>We combined extracted features from BiLSTM and CNN into a stacked feature set, which was then classified using a Random Forest (RF) algorithm. As previous studies show, RF performs better with stacked features compared to other classifiers.
                    <sup>
                        <xref ref-type="bibr" rid="ref10">10</xref>
                    </sup> Performance metrics, including sensitivity, specificity, and precision, were calculated to assess the model&#x2019;s effectiveness.</p>
            </sec>
            <sec id="sec19">
                <title>C. ROC curve and comparison with other algorithms</title>
                <p>
                    <xref ref-type="fig" rid="f4">
Figure 4</xref> above presents the ROC curve comparing BiLSTM+CNN and CNN feature extractors on METABRIC data. The results demonstrate superior AUC values for BiLSTM+CNN across different modalities. 
                    <xref ref-type="table" rid="T6">
Table 6</xref> below summarizes the comparison of AUC and accuracy with existing algorithms:</p>
                <table-wrap id="T6" orientation="portrait" position="float">
                    <label>
Table 6. </label>
                    <caption>
                        <title>Comparison of improved deep learning algorithm (BILSTM+CNN).</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="2" rowspan="1" valign="top">Algorithm</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">AUC</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
ACC</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="3" valign="top">BiLSTM+CNN Stacked RF</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">BiLSTM+CNN-clinical
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>0.91</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>0.88</bold>
</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">BiLSTM+CNN-cna
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>0.87</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>0.85</bold>
</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">BiLSTM+CNN-gene expression</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.87</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.82</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="3" valign="top">Heterogenous Stacked RF [28]</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">CNN-clinical
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.85</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">DNN-cna
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.72</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">CNN-gene expression</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.90</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="3" valign="top">Stacked RF [28]</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">CNN-clinical
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.83</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">CNN-cna
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.75</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.74</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">CNN-gene expression</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.92</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="3" valign="top">MDNNMD [28]</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">DNN-clinical
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.79</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">DNN-cna
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.61</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">DNN-gene expression</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.74</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="3" valign="top">SiGaAtCNN Stacked RF [8]</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">SiGaAtCNN-clinical
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">SiGaAtCNN-cna</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.83</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">SiGaAtCNN-gene expression</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>0.95</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>0.89</bold>
</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>The bold values in 
                    <xref ref-type="table" rid="T6">
Table 6</xref> highlight the best performing results for each data modality (clinical, CN and gene expression) across all compared algorithms. The results clearly demonstrate that the proposed BiLSTM+CNN algorithm performs better than previous algorithms across multiple data modalities. However, in the gene expression modality indicate that the SiGaAtCNN Stacked RF model achieved superior performance, outperforming the proposed BiLSTM+CNN model in that specific category. The comparison included models such as MDNNMD, SiGaAtCNN, and Heterogeneous Stacked RF. As shown in 
                    <xref ref-type="table" rid="T7">
Table 7</xref> above, our algorithm outperforms others in terms of accuracy, precision, sensitivity, and Matthews correlation coefficient (MCC).</p>
                <table-wrap id="T7" orientation="portrait" position="float">
                    <label>
Table 7. </label>
                    <caption>
                        <title>Comparison of classification performance of Improved Deep Learning Algorithm (BiLSTM+CNN) with previous works on METABRIC data.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Algorithm</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Acc</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Pre</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Sn</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
Mcc</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">BiLSTM+CNN Stacked RF</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.95</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">1.0</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Heterogenous Stacked RF [28]</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.97</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.97</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">-</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Stacked RF [8]</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.90</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.75</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.73</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">MDNNMD [28]</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.83</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.75</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.45</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.47</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">SiGaAtCNN Stacked RF [8]</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.91</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.77</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
            </sec>
            <sec id="sec20">
                <title>D. Validation on TCGA dataset</title>
                <p>To further validate the performance, we used the TCGA-BRCA dataset.
                    <sup>
                        <xref ref-type="bibr" rid="ref14">15</xref>
                    </sup> This dataset contains 250 long-term survivors and 830 short-term survivors, with data modalities matching those in the METABRIC dataset. Pre-processing was conducted using the same steps outlined in Sections B and C.</p>
                <p>
                    <xref ref-type="fig" rid="f5">
Figure 5</xref> presents the ROC curve for the TCGA dataset, demonstrating that the BiLSTM+CNN Stacked RF algorithm maintains high performance across datasets. Below are the performance metrics along with the 95% confidence intervals (CI):
                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Clinical data: Accuracy = 0.739, 95% CI [0.741, 0.737]</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>CNA data: Accuracy = 0.903, 95% CI [0.906, 0.900]</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Gene expression data: Accuracy = 0.964, 95% CI [0.965, 0.962]</p>
                        </list-item>
                    </list>
                </p>
                <fig fig-type="figure" id="f5" orientation="portrait" position="float">
                    <label>
Figure 5. </label>
                    <caption>
                        <title>ROC curve of improved deep learning algorithm (BiLSTM+CNN) and CNN as feature extractors on TCGA data.</title>
                    </caption>
                    <graphic id="gr5" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/197690/355e21ee-ec80-4f56-8556-c539a6cf740f_figure5.gif"/>
                </fig>
                <p>These results show that the model generalizes well to the TCGA dataset, especially on gene expression data, where it achieves high accuracy. The 95% CI for each modality further supports the robustness of the proposed model. Despite inherent differences between the METABRIC and TCGA datasets, the BiLSTM+CNN model achieves high accuracy across all data modalities. The results of our algorithm are compared with other state-of-the-art algorithms in 
                    <xref ref-type="table" rid="T8">
Table 8</xref> as follows:</p>
                <table-wrap id="T8" orientation="portrait" position="float">
                    <label>
Table 8. </label>
                    <caption>
                        <title>Comparison of classification performance of proposed Improved Deep Learning Algorithm (BiLSTM+CNN) with previous works on TCGA data.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Algorithm</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Acc</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Pre</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Sn</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
Mcc</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">BiLSTM+CNN stacked RF</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.87</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">SiGaAtCNN Stacked RF [8]</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.91</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.77</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Stacked RF [8]</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.92</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.83</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
            </sec>
        </sec>
        <sec id="sec21" sec-type="discussion">
            <title>Discussion</title>
            <p>The results confirm that BiLSTM+CNN, when combined with RF-based classification, offers significant improvements over existing algorithms. The algorithm achieved 98% accuracy, 1.0 sensitivity, 0.95 precision, and 0.81 MCC on METABRIC data, and 98% accuracy, 0.87 precision, 0.93 sensitivity, and 0.80 MCC on TCGA data.</p>
            <p>The combination of CNN and BiLSTM allows the model to effectively handle both time-series and spatial data, enhancing predictive performance. However, challenges remain with gene expression data, which require further research and hyperparameter tuning. Nonetheless, the strong performance across multiple datasets supports the potential of this model for personalized treatment and clinical decision-making.</p>
            <p>In terms of classification, the BiLSTM+CNN model outputs probabilities ranging between 0 and 1 for each class. To convert these probabilities into binary labels (0/1), a thresholding technique was employed. We used the validation set to determine the optimal threshold, selecting the value that maximized the AUC-ROC score. This approach ensures the best balance between sensitivity and specificity, especially when dealing with imbalanced class distributions. The same threshold was applied to the test set to compute the final performance metrics reported in this study. This threshold optimization ensures that the reported metrics&#x2014;accuracy, sensitivity, specificity, and precision accurately reflect the model&#x2019;s true performance under realistic conditions.</p>
            <p>Although the proposed model demonstrates strong predictive performance, the present study focuses primarily on the computational and methodological aspects of survival prediction. Biological interpretation of selected features and their association with known breast cancer pathways was not explored in this work. Future studies may integrate pathway enrichment analysis or gene importance analysis to improve interpretability and facilitate clinical translation.</p>
            <p>While the results are promising, several limitations must be acknowledged before clinical deployment can be considered. First, both METABRIC and TCGA-BRCA are retrospective public datasets; prospective validation in diverse, real-world clinical settings will be necessary to confirm generalisability across different ethnic populations, treatment protocols, and data-acquisition standards. Second, the current model was trained and evaluated under controlled preprocessing conditions; integrating it into a clinical workflow would require robust data-harmonisation pipelines and regular model recalibration to account for distributional shift. Third, model predictions have not yet been evaluated for calibration (i.e., the degree to which predicted probabilities reflect true event rates), which is an important property for clinical risk communication. Fourth, regulatory approval and clinician trust will require prospective clinical trials and human-factors evaluation. Consequently, statements regarding clinical applicability should be interpreted as indicating research potential rather than immediate readiness for deployment.</p>
            <sec id="sec22">
                <title>Comparison with relevant literature</title>
                <p>Our study builds upon existing research that utilizes multi-omics data for survival prediction. Curtis et al.
                    <sup>
                        <xref ref-type="bibr" rid="ref13">13</xref>
                    </sup> identified prognostic biomarkers using a multidimensional competition-based framework with the METABRIC dataset, while our study advances this work by integrating BiLSTM and CNN architectures for capturing temporal and spatial patterns across data modalities. Unlike the framework by Curtis et al.,
                    <sup>
                        <xref ref-type="bibr" rid="ref13">13</xref>
                    </sup> which focused primarily on identifying subgroups, our model emphasizes multi-omics data integration for improved survival predictions and interpretability.</p>
                <p>Additionally, Yousefi et al.
                    <sup>
                        <xref ref-type="bibr" rid="ref7">7</xref>
                    </sup> and Mobadersany et al.
                    <sup>
                        <xref ref-type="bibr" rid="ref15">18</xref>
                    </sup> employed convolutional networks for cancer survival outcome predictions, but their models primarily focused on histological data. In contrast, our model integrates clinical, CNA, and gene expression data, providing a more comprehensive and interpretable prediction framework. This integration allows the model to extract complex patterns that go beyond histological data alone. The study by Jadoon et al.
                    <sup>
                        <xref ref-type="bibr" rid="ref23">25</xref>
                    </sup> proposed a heterogeneous multiple kernel learning approach for breast cancer prognosis, addressing the challenge of multimodal data. While their approach is robust, our deep learning-based solution offers enhanced predictive performance through the combined use of BiLSTM and CNN architectures, which capture both sequential and spatial information across data types. Similarly, Phan et al.
                    <sup>
                        <xref ref-type="bibr" rid="ref6">6</xref>
                    </sup> demonstrated the use of machine learning models for decoding breast cancer with multi-omics data but faced challenges related to model interpretability and high dimensionality. Our approach, with MRMR feature selection, addresses these challenges by reducing dimensionality while retaining the most informative features.</p>
                <p>In summary, our model offers a novel combination of deep learning models and feature selection techniques to provide actionable clinical insights. The use of decision-level integration ensures robust predictions across datasets, with significant improvements observed on both METABRIC and TCGA datasets. These comparisons highlight how our work advances the field by building on previous methodologies while addressing key limitations, such as the interpretability and scalability of predictive models.</p>
            </sec>
        </sec>
        <sec id="sec23" sec-type="conclusion">
            <title>Conclusion</title>
            <p>Over the past two decades, significant progress has been made in the treatment of primary breast cancer, with advancements in early detection, prognosis, and treatment leading to a notable decrease in mortality rates. However, breast cancer continues to pose challenges, particularly in terms of early detection and precise survival prediction. The heterogeneity in clinical outcomes and the complexity associated with genetic variations present challenges for oncologists in devising optimal treatment plans. Therefore, developing intelligent systems to enhance breast cancer diagnosis and treatment remains essential.</p>
            <p>This research introduced an improved deep learning algorithm (BiLSTM+CNN) aimed at benefiting both individuals with breast cancer and healthcare practitioners. The proposed algorithm utilizes a stacked ensemble framework, combining BiLSTM and CNN for feature extraction and a boosted Random Forest (RF) for survival prediction. The study leverages multi-omics data, including clinical data, copy number alteration data, and gene expression data. These extracted features serve as input to the boosted RF classifier, resulting in superior survival prediction.</p>
            <p>Our experimental results demonstrate that the proposed deep learning model (BiLSTM+CNN) outperformed existing models, achieving an accuracy of 98%. Furthermore, the versatility of the model suggests its applicability to other aggressive cancers such as cervical cancer, oral cancer, and lung cancer. By integrating multiple data modalities, the proposed approach enhances the robustness and reliability of predictions.</p>
            <p>Future work could explore the inclusion of additional omics data, such as pathway data, gene methylation profiles, and miRNA expression. Expanding the model&#x2019;s capability to handle various cancer types would also contribute to advancing personalized treatment approaches and clinical decision-making.</p>
        </sec>
        <sec id="sec24">
            <title>Ethical considerations</title>
            <p>Not applicable.</p>
        </sec>
        <sec id="sec25">
            <title>Software availability
</title>
            <p>

                <list list-type="bullet">
                    <list-item>
                        <label>&#x2022;</label>
                        <p>Source code available from: 
                            <ext-link ext-link-type="uri" xlink:href="https://github.com/NNasarudin/CNN-BiLSTM-for-Breast-Cancer-Survival-Prediction-Based-on-Multi-Omics-Data.git">https://github.com/NNasarudin/CNN-BiLSTM-for-Breast-Cancer-Survival-Prediction-Based-on-Multi-Omics-Data.git</ext-link>
                        </p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>Archived software available from: 
                            <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5281/zenodo.15964646">https://doi.org/10.5281/zenodo.15964646</ext-link>
                        </p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>License: GNU Lesser General Public License v3.0.</p>
                    </list-item>
                </list>
            </p>
        </sec>
    </body>
    <back>
        <sec id="sec28" sec-type="data-availability">
            <title>Data availability</title>
            <p>Data used in this research is available in the cBioPortal for Cancer Genomics and Genomic Data Commons (GDC) Data Portal database.
                <list list-type="bullet">
                    <list-item>
                        <label>&#x2022;</label>
                        <p>cBioPortal: Breast Cancer (METABRIC, Nature 2012 &amp; Nat Commun 2016). 
                            <ext-link ext-link-type="uri" xlink:href="https://www.cbioportal.org/study/summary?id=brca_metabric">https://www.cbioportal.org/study/summary?id=brca_metabric</ext-link>
                        </p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>GDC Data Portal: Breast Invasive Carcinoma (TCGA-BRCA): Accession number: phs000178. 
                            <ext-link ext-link-type="uri" xlink:href="https://portal.gdc.cancer.gov/projects/TCGA-BRCA">https://portal.gdc.cancer.gov/projects/TCGA-BRCA
</ext-link>
                        </p>
                    </list-item>
                </list>
            </p>
        </sec>
        <ack>
            <title>Acknowledgements</title>
            <p>Not applicable.</p>
        </ack>
        <ref-list>
            <title>References</title>
            <ref id="ref1">
                <label>1</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Bray</surname>
                            <given-names>F</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ferlay</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Soerjomataram</surname>
                            <given-names>I</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Global cancer statistics 2018: GLOBOCAN estimates of incidence and mortality worldwide for 36 cancers in 185 countries.</article-title>
                    <source>

                        <italic toggle="yes">CA Cancer J. Clin.</italic>
</source>
                    <year>2018</year>;<volume>68</volume>(<issue>6</issue>):<fpage>394</fpage>&#x2013;<lpage>424</lpage>.
                    <pub-id pub-id-type="pmid">30207593</pub-id>
                    <pub-id pub-id-type="doi">10.3322/caac.21492</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref2">
                <label>2</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Huang</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Global incidence and mortality of breast cancer: a trend analysis.</article-title>
                    <source>

                        <italic toggle="yes">Aging.</italic>
</source>
                    <year>Feb. 2021</year>;<volume>13</volume>(<issue>4</issue>):<fpage>5748</fpage>&#x2013;<lpage>5803</lpage>.
                    <pub-id pub-id-type="pmid">33592581</pub-id>
                    <pub-id pub-id-type="doi">10.18632/aging.202502</pub-id>
                    <pub-id pub-id-type="pmcid">PMC7950292</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref3">
                <label>3</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Sopik</surname>
                            <given-names>V</given-names>
                        </name>
</person-group>:
                    <article-title>International variation in breast cancer incidence and mortality in young women.</article-title>
                    <source>

                        <italic toggle="yes">Breast Cancer Res. Treat.</italic>
</source>
                    <year>Apr. 2021</year>;<volume>186</volume>(<issue>2</issue>):<fpage>497</fpage>&#x2013;<lpage>507</lpage>.
                    <pub-id pub-id-type="pmid">33145697</pub-id>
                    <pub-id pub-id-type="doi">10.1007/s10549-020-06003-8</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref4">
                <label>4</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Paulin</surname>
                            <given-names>F</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Santhakumaran</surname>
                            <given-names>DA</given-names>
                        </name>
</person-group>:
                    <article-title>Extracting Rules from Feed Forward Neural Networks for Diagnosing Breast Cancer.</article-title>
                    <source>

                        <italic toggle="yes">Artif. Intell. Syst. Mach. Learn.</italic>
</source>
                    <year>2009</year>;<volume>1</volume>(<issue>4</issue>):<fpage>Art. no. 4</fpage>.</mixed-citation>
            </ref>
            <ref id="ref5">
                <label>5</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Zhao</surname>
                            <given-names>L</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>DeepOmix: A scalable and interpretable multi-omics deep learning framework and application in cancer survival analysis.</article-title>
                    <source>

                        <italic toggle="yes">Comput. Struct. Biotechnol. J.</italic>
</source>
                    <year>Jan. 2021</year>;<volume>19</volume>:<fpage>2719</fpage>&#x2013;<lpage>2725</lpage>.
                    <pub-id pub-id-type="pmid">34093987</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.csbj.2021.04.067</pub-id>
                    <pub-id pub-id-type="pmcid">PMC8131983</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref6">
                <label>6</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Phan</surname>
                            <given-names>JH</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hoffman</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kothari</surname>
                            <given-names>S</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <chapter-title>Integration of multi-modal biomedical data to predict cancer grade and patient survival.</chapter-title>
                    <source>

                        <italic toggle="yes">2016 IEEE-EMBS International Conference on Biomedical and Health Informatics (BHI).</italic>
</source>
                    <year>Feb. 2016</year>; pp.<fpage>577</fpage>&#x2013;<lpage>580</lpage>.
                    <pub-id pub-id-type="doi">10.1109/BHI.2016.7455963</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref7">
                <label>7</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Zhao</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Tang</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kim</surname>
                            <given-names>H</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Machine Learning With K-Means Dimensional Reduction for Predicting Survival Outcomes in Patients With Breast Cancer.</article-title>
                    <source>

                        <italic toggle="yes">Cancer Inform.</italic>
</source>
                    <year>Jan. 2018</year>;<volume>17</volume>:<fpage>1176935118810215</fpage>.
                    <pub-id pub-id-type="pmid">30455569</pub-id>
                    <pub-id pub-id-type="doi">10.1177/1176935118810215</pub-id>
                    <pub-id pub-id-type="pmcid">PMC6238199</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref8">
                <label>8</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Goli</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mahjub</surname>
                            <given-names>H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Faradmal</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Survival Prediction and Feature Selection in Patients with Breast Cancer Using Support Vector Regression.</article-title>
                    <source>

                        <italic toggle="yes">Comput. Math. Methods Med.</italic>
</source>
                    <year>2016</year>;<volume>2016</volume>(<issue>1</issue>):<fpage>1</fpage>&#x2013;<lpage>12</lpage>.
                    <pub-id pub-id-type="pmid">27882074</pub-id>
                    <pub-id pub-id-type="doi">10.1155/2016/2157984</pub-id>
                    <pub-id pub-id-type="pmcid">PMC5108874</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref9">
                <label>9</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Gevaert</surname>
                            <given-names>O</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Smet</surname>
                            <given-names>FD</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Timmerman</surname>
                            <given-names>D</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Predicting the prognosis of breast cancer by integrating clinical and microarray data with Bayesian networks.</article-title>
                    <source>

                        <italic toggle="yes">Bioinformatics.</italic>
</source>
                    <year>Jul. 2006</year>;<volume>22</volume>(<issue>14</issue>):<fpage>e184</fpage>&#x2013;<lpage>e190</lpage>.
                    <pub-id pub-id-type="pmid">16873470</pub-id>
                    <pub-id pub-id-type="doi">10.1093/bioinformatics/btl230</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref10">
                <label>10</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Sun</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Li</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Tang</surname>
                            <given-names>B</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Integrating genomic data and pathological images to effectively predict breast cancer clinical outcome.</article-title>
                    <source>

                        <italic toggle="yes">Comput. Methods Prog. Biomed.</italic>
</source>
                    <year>Jul. 2018</year>;<volume>161</volume>:<fpage>45</fpage>&#x2013;<lpage>53</lpage>.
                    <pub-id pub-id-type="pmid">29852967</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.cmpb.2018.04.008</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref11">
                <label>11</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ma</surname>
                            <given-names>T</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Zhang</surname>
                            <given-names>A</given-names>
                        </name>
</person-group>:
                    <chapter-title>Multi-view Factorization AutoEncoder with Network Constraints for Multi-omic Integrative Analysis.</chapter-title>
                    <source>

                        <italic toggle="yes">2018 IEEE International Conference on Bioinformatics and Biomedicine (BIBM).</italic>
</source>
                    <year>Dec. 2018</year>; pp.<fpage>702</fpage>&#x2013;<lpage>707</lpage>.
                    <pub-id pub-id-type="doi">10.1109/BIBM.2018.8621379</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref12">
                <label>12</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Martin</surname>
                            <given-names>LR</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Williams</surname>
                            <given-names>SL</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Haskard</surname>
                            <given-names>KB</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>The challenge of patient adherence.</article-title>
                    <source>

                        <italic toggle="yes">Ther. Clin. Risk Manag.</italic>
</source>
                    <year>Sep. 2005</year>;<volume>1</volume>(<issue>3</issue>):<fpage>189</fpage>&#x2013;<lpage>199</lpage>.
                    <pub-id pub-id-type="doi">10.2147/tcrm.s12160382</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref13">
                <label>13</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Curtis</surname>
                            <given-names>C</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>The genomic and transcriptomic architecture of 2,000 breast tumours reveals novel subgroups.</article-title>
                    <source>

                        <italic toggle="yes">Nature.</italic>
</source>
                    <year>Jun. 2012</year>;<volume>486</volume>(<issue>7403</issue>):<fpage>346</fpage>&#x2013;<lpage>352</lpage>.
                    <pub-id pub-id-type="pmid">22522925</pub-id>
                    <pub-id pub-id-type="doi">10.1038/nature10983</pub-id>
                    <pub-id pub-id-type="pmcid">PMC3440846</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref20">
                <label>14</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Arya</surname>
                            <given-names>N</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Saha</surname>
                            <given-names>S</given-names>
                        </name>
</person-group>:
                    <article-title>Multi-modal advanced deep learning architectures for breast cancer survival prediction.</article-title>
                    <source>

                        <italic toggle="yes">Knowl.-Based Syst.</italic>
</source>
                    <year>Jun. 2021</year>;<volume>221</volume>:<fpage>106965</fpage>.
                    <pub-id pub-id-type="doi">10.1016/j.knosys.2021.106965</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref14">
                <label>15</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Tomczak</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Czerwi&#x0144;ska</surname>
                            <given-names>P</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wiznerowicz</surname>
                            <given-names>M</given-names>
                        </name>
</person-group>:
                    <article-title>Review The Cancer Genome Atlas (TCGA): an immeasurable source of knowledge.</article-title>
                    <source>

                        <italic toggle="yes">Contemp. Oncol. Onkol.</italic>
</source>
                    <year>2015</year>;<volume>1A</volume>(<issue>1</issue>):<fpage>68</fpage>&#x2013;<lpage>77</lpage>.
                    <pub-id pub-id-type="pmid">25691825</pub-id>
                    <pub-id pub-id-type="doi">10.5114/wo.2014.47136</pub-id>
                    <pub-id pub-id-type="pmcid">PMC4322527</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref24">
                <label>16</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Moreno-Barea</surname>
                            <given-names>FJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Jerez</surname>
                            <given-names>JM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Franco</surname>
                            <given-names>L</given-names>
                        </name>
</person-group>:
                    <chapter-title>Gan-based data augmentation for prediction improvement using gene expression data in cancer.</chapter-title>
                    <source>

                        <italic toggle="yes">International Conference on Computational Science.</italic>
</source>Vol.<volume>13352</volume>.
                    <publisher-loc>Cham</publisher-loc>;
                    <publisher-name>Springer International Publishing</publisher-name>;<year>2022 Jun</year>; pp.<fpage>28</fpage>&#x2013;<lpage>42</lpage>.
                    <pub-id pub-id-type="doi">10.1007/978-3-031-08757-8_3</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref25">
                <label>17</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Nouri</surname>
                            <given-names>N</given-names>
                        </name>
</person-group>:
                    <article-title>Single-cell RNA-seq data augmentation using generative Fourier transformer.</article-title>
                    <source>

                        <italic toggle="yes">Commun. Biol.</italic>
</source>
                    <year>2025 Jan 22</year>;<volume>8</volume>(<issue>1</issue>):<fpage>113</fpage>.
                    <pub-id pub-id-type="pmid">39843603</pub-id>
                    <pub-id pub-id-type="doi">10.1038/s42003-025-07552-8</pub-id>
                    <pub-id pub-id-type="pmcid">PMC11754799</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref15">
                <label>18</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Cheerla</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gevaert</surname>
                            <given-names>O</given-names>
                        </name>
</person-group>:
                    <article-title>Deep learning with multimodal representation for pancancer prognosis prediction.</article-title>
                    <source>

                        <italic toggle="yes">Bioinformatics.</italic>
</source>
                    <year>Jul. 2019</year>;<volume>35</volume>(<issue>14</issue>):<fpage>i446</fpage>&#x2013;<lpage>i454</lpage>.
                    <pub-id pub-id-type="pmid">31510656</pub-id>
                    <pub-id pub-id-type="doi">10.1093/bioinformatics/btz342</pub-id>
                    <pub-id pub-id-type="pmcid">PMC6612862</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref16">
                <label>19</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Vale-Silva</surname>
                            <given-names>LA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Rohr</surname>
                            <given-names>K</given-names>
                        </name>
</person-group>:
                    <article-title>Long-term cancer survival prediction using multimodal deep learning.</article-title>
                    <source>

                        <italic toggle="yes">Sci. Rep.</italic>
</source>
                    <year>Jun. 2021</year>;<volume>11</volume>(<issue>1</issue>):<fpage>13505</fpage>.
                    <pub-id pub-id-type="pmid">34188098</pub-id>
                    <pub-id pub-id-type="doi">10.1038/s41598-021-92799-4</pub-id>
                    <pub-id pub-id-type="pmcid">PMC8242026</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref17">
                <label>20</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Al-Helali</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Chen</surname>
                            <given-names>Q</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Xue</surname>
                            <given-names>B</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A new imputation method based on genetic programming and weighted KNN for symbolic regression with incomplete data.</article-title>
                    <source>

                        <italic toggle="yes">Soft. Comput.</italic>
</source>
                    <year>Apr. 2021</year>;<volume>25</volume>(<issue>8</issue>):<fpage>5993</fpage>&#x2013;<lpage>6012</lpage>.
                    <pub-id pub-id-type="doi">10.1007/s00500-021-05590-y</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref18">
                <label>21</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Lecun</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bottou</surname>
                            <given-names>L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bengio</surname>
                            <given-names>Y</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Gradient-based learning applied to document recognition.</article-title>
                    <source>

                        <italic toggle="yes">Proc. IEEE.</italic>
</source>
                    <year>Nov. 1998</year>;<volume>86</volume>(<issue>11</issue>):<fpage>2278</fpage>&#x2013;<lpage>2324</lpage>.
                    <pub-id pub-id-type="doi">10.1109/5.726791</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref19">
                <label>22</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Hochreiter</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Schmidhuber</surname>
                            <given-names>J</given-names>
                        </name>
</person-group>:
                    <article-title>Long Short-Term Memory.</article-title>
                    <source>

                        <italic toggle="yes">Neural Comput.</italic>
</source>
                    <year>Nov. 1997</year>;<volume>9</volume>(<issue>8</issue>):<fpage>1735</fpage>&#x2013;<lpage>1780</lpage>.
                    <pub-id pub-id-type="doi">10.1162/neco.1997.9.8.1735</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref21">
                <label>23</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ganaie</surname>
                            <given-names>MA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hu</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Malik</surname>
                            <given-names>AK</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Ensemble deep learning: A review.</article-title>
                    <source>

                        <italic toggle="yes">Eng. Appl. Artif. Intell.</italic>
</source>
                    <year>Oct. 2022</year>;<volume>115</volume>:<fpage>105151</fpage>.
                    <pub-id pub-id-type="doi">10.1016/j.engappai.2022.105151</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref22">
                <label>24</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kingma</surname>
                            <given-names>DP</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ba</surname>
                            <given-names>J</given-names>
                        </name>
</person-group>:
                    <article-title>Adam: A Method for Stochastic Optimization.</article-title>
                    <year>Jan. 30, 2017</year>. arXiv: arXiv:1412.6980.
                    <pub-id pub-id-type="doi">10.48550/arXiv.1412.6980</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref23">
                <label>25</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Jadoon</surname>
                            <given-names>EK</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Khan</surname>
                            <given-names>FG</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Shah</surname>
                            <given-names>S</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Deep Learning-Based Multi-Modal Ensemble Classification Approach for Human Breast Cancer Prognosis.</article-title>
                    <source>

                        <italic toggle="yes">IEEE Access.</italic>
</source>
                    <year>2023</year>;<volume>11</volume>:<fpage>85760</fpage>&#x2013;<lpage>85769</lpage>.
                    <pub-id pub-id-type="doi">10.1109/ACCESS.2023.3304242</pub-id>
                </mixed-citation>
            </ref>
        </ref-list>
    </back>
    <sub-article article-type="reviewer-report" id="report474661">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.197690.r474661</article-id>
            <title-group>
                <article-title>Reviewer response for version 3</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Zainudin</surname>
                        <given-names>Suhaila</given-names>
                    </name>
                    <xref ref-type="aff" rid="r474661a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0003-2352-5312</uri>
                </contrib>
                <aff id="r474661a1">
                    <label>1</label>Universiti Kebangsaan Malaysia, Bangi, Selangor, Malaysia</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>27</day>
                <month>4</month>
                <year>2026</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2026 Zainudin S</copyright-statement>
                <copyright-year>2026</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport474661" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.166682.3"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>I have read the revised edition and have no further comments.</p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>Partly</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>Yes</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Yes</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>Yes</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>Yes</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Partly</p>
            <p>Reviewer Expertise:</p>
            <p>Data Analytics</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard.</p>
        </body>
    </sub-article>
    <sub-article article-type="reviewer-report" id="report476836">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.197690.r476836</article-id>
            <title-group>
                <article-title>Reviewer response for version 3</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Arifin</surname>
                        <given-names>Toni</given-names>
                    </name>
                    <xref ref-type="aff" rid="r476836a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0009-0001-8049-1158</uri>
                </contrib>
                <aff id="r476836a1">
                    <label>1</label>Universitas Adhirajasa Reswara Sanjaya, Bandung, Indonesia</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>25</day>
                <month>4</month>
                <year>2026</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2026 Arifin T</copyright-statement>
                <copyright-year>2026</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport476836" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.166682.3"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>This technically robust paper demonstrates exceptional transparency following its third revision. By providing GitHub source code and documenting critical technical parameters&#x2014;including random seeds, hardware specifications, and specific software versions&#x2014;the authors ensure full reproducibility. This work offers a significant, verifiable contribution to the bioinformatics research community, effectively addressing previous concerns regarding methodological clarity and experimental replication standards.</p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>Yes</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>Yes</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Yes</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>Yes</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>Yes</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Yes</p>
            <p>Reviewer Expertise:</p>
            <p>Machine Learning, Image Processing, Deep Learning and Artificial Intelligence.</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard.</p>
        </body>
    </sub-article>
    <sub-article article-type="reviewer-report" id="report445784">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.187135.r445784</article-id>
            <title-group>
                <article-title>Reviewer response for version 2</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Zainudin</surname>
                        <given-names>Suhaila</given-names>
                    </name>
                    <xref ref-type="aff" rid="r445784a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0003-2352-5312</uri>
                </contrib>
                <aff id="r445784a1">
                    <label>1</label>Universiti Kebangsaan Malaysia, Bangi, Selangor, Malaysia</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>17</day>
                <month>2</month>
                <year>2026</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2026 Zainudin S</copyright-statement>
                <copyright-year>2026</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport445784" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.166682.2"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve-with-reservations</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>This article presents a hybrid deep learning framework (BiLSTM+CNN with stacked Random Forest) for predicting breast cancer survival using multi-omics data from the METABRIC and TCGA datasets. The authors integrate clinical, copy number alteration, and gene expression features with MRMR feature selection and evaluate performance using cross-validation and ROC-based metrics. The study reports high predictive accuracy and demonstrates external validation across datasets, suggesting robustness and potential clinical relevance.</p>
            <p> </p>
            <p> The manuscript is clearly structured and generally well written. The background appropriately situates the work within recent literature on multi-omics survival modeling. However, claims regarding interpretability and clinical applicability would benefit from more explicit demonstrations (e.g., feature importance analysis linked to biological pathways). This is a 
                <italic>recommended improvement</italic>, not a requirement for scientific soundness.</p>
            <p> The use of two independent public datasets, cross-validation, and standard evaluation metrics constitutes an appropriate and technically sound design. External validation strengthens the conclusions. An ablation study isolating the contribution of each architectural component would further support claims of model superiority but is optional.</p>
            <p> </p>
            <p> While the authors provide architecture details and public code, several aspects limit full replication. To ensure scientific soundness, the authors 
                <bold>must</bold>: (1) specify exact preprocessing pipelines and parameter settings for MRMR and data augmentation; (2) document all random seeds and data splits; and (3) describe the computational environment (hardware/software versions). Providing a step-by-step reproducibility protocol or configuration file would resolve these issues.</p>
            <p> </p>
            <p> The evaluation metrics and cross-validation strategy are appropriate. Including formal statistical comparisons between models or calibration analyses would strengthen rigor but is not essential.</p>
            <p> </p>
            <p> Public datasets and open code support reproducibility. The conclusions are broadly supported by the results, though statements about clinical impact should be moderated or accompanied by discussion of deployment limitations.</p>
            <p> </p>
            <p> Overall, the article is scientifically promising. Addressing the required reproducibility clarifications will make it methodologically robust.</p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>Partly</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>Yes</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Yes</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>Yes</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>Yes</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Partly</p>
            <p>Reviewer Expertise:</p>
            <p>Data Analytics</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above.</p>
        </body>
        <back>
            <ref-list>
                <title>References</title>
                <ref id="rep-ref-445784-1">
                    <label>1</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Metaheuristic approach for an enhanced mRMR filter method for classification using drug response microarray data</article-title>.
                        <source>
                            <italic>Expert Systems with Applications</italic>
                        </source>.<year>2017</year>;<volume>90</volume>:
                        <elocation-id>10.1016/j.eswa.2017.08.026</elocation-id>
                        <fpage>224</fpage>-<lpage>231</lpage>
                        <pub-id pub-id-type="doi">10.1016/j.eswa.2017.08.026</pub-id>
                    </mixed-citation>
                </ref>
            </ref-list>
        </back>
        <sub-article article-type="response" id="comment15660-445784">
            <front-stub>
                <contrib-group>
                    <contrib contrib-type="author">
                        <name>
                            <surname>Mohamad</surname>
                            <given-names>Mohd Saberi</given-names>
                        </name>
                        <aff>Genetics and Genomics, United Arab Emirates University, Al Ain, Abu Dhabi, United Arab Emirates</aff>
                    </contrib>
                </contrib-group>
                <author-notes>
                    <fn fn-type="conflict">
                        <p>
                            <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                    </fn>
                </author-notes>
                <pub-date pub-type="epub">
                    <day>12</day>
                    <month>3</month>
                    <year>2026</year>
                </pub-date>
            </front-stub>
            <body>
                <p>Comment 1:</p>
                <p> However, claims regarding interpretability and clinical applicability would benefit from more explicit demonstrations (e.g., feature importance analysis linked to biological pathways). This is a&#x00a0;
                    <italic>recommended improvement</italic>, not a requirement for scientific soundness.</p>
                <p> Amendment 1:</p>
                <p> This part has been improved and revised to clarify the scope and limitations of the study. We explicitly acknowledged that biological interpretation of selected features and their association with breast cancer pathways was not explored, and we suggested future directions such as pathway enrichment or gene importance analyses to enhance interpretability. Additionally, we have highlighted key considerations and limitations regarding clinical applicability, including dataset constraints, preprocessing dependencies, model calibration, and the need for prospective validation, regulatory approval, and clinician evaluation.</p>
                <p> Under discussion section (paragraph 4 and 5)</p>
                <p> </p>
                <p> </p>
                <p> Comment 2:</p>
                <p> While the authors provide architecture details and public code, several aspects limit full replication. To ensure scientific soundness, the authors&#x00a0;must:</p>
                <p> (1) specify exact preprocessing pipelines and parameter settings for MRMR and data augmentation;</p>
                <p> Amendment 2:</p>
                <p> This part has been improved and revised to ensure full reproducibility. We added a detailed flowchart of the preprocessing pipeline and explicitly described all preprocessing steps and parameter settings.</p>
                <p> Under preprocessing data section. Added figure 1 as the flowchart of the preprocessing.</p>
                <p> Explanation at paragraph 3 and 4.</p>
                <p> </p>
                <p> Comment 3:</p>
                <p> (2) document all random seeds and data splits;</p>
                <p> Amendment 3:</p>
                <p> All random seeds and data splits have been included.</p>
                <p> Under data augmentation section (last paragraph)</p>
                <p> </p>
                <p> Comment 4:</p>
                <p> (3) describe the computational environment (hardware/software versions).</p>
                <p> Amendment 4:</p>
                <p> Computational environment has been included.</p>
                <p> Under computational environment section.</p>
                <p> </p>
                <p> Comment 5:</p>
                <p> Public datasets and open code support reproducibility. The conclusions are broadly supported by the results, though statements about clinical impact should be moderated or accompanied by discussion of deployment limitations.</p>
                <p> Amendment 5:</p>
                <p> The explanation has been included, as detailed in point number 1</p>
            </body>
        </sub-article>
        <sub-article article-type="response" id="comment16043-445784">
            <front-stub>
                <contrib-group>
                    <contrib contrib-type="author">
                        <name>
                            <surname>Zainudin</surname>
                            <given-names>Suhaila</given-names>
                        </name>
                    </contrib>
                </contrib-group>
                <author-notes>
                    <fn fn-type="conflict">
                        <p>
                            <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                    </fn>
                </author-notes>
                <pub-date pub-type="epub">
                    <day>26</day>
                    <month>4</month>
                    <year>2026</year>
                </pub-date>
            </front-stub>
            <body>
                <p>The revisions are accepted.</p>
            </body>
        </sub-article>
    </sub-article>
    <sub-article article-type="reviewer-report" id="report410019">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.187135.r410019</article-id>
            <title-group>
                <article-title>Reviewer response for version 2</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Chan</surname>
                        <given-names>Weng Howe</given-names>
                    </name>
                    <xref ref-type="aff" rid="r410019a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0003-0612-3661</uri>
                </contrib>
                <aff id="r410019a1">
                    <label>1</label>Universiti Teknologi Malaysia, Johor Bahru, Malaysia</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>5</day>
                <month>9</month>
                <year>2025</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2025 Chan WH</copyright-statement>
                <copyright-year>2025</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport410019" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.166682.2"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>Authors have addressed all the comments, especially regarding data augmentations and the MRMR feature selection process. No further comments.</p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>Yes</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>I cannot comment. A qualified statistician is required.</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Yes</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>Yes</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>Yes</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Partly</p>
            <p>Reviewer Expertise:</p>
            <p>bioinformatics, computational biology, artificial intelligence</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard.</p>
        </body>
    </sub-article>
    <sub-article article-type="reviewer-report" id="report402954">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.183707.r402954</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Chan</surname>
                        <given-names>Weng Howe</given-names>
                    </name>
                    <xref ref-type="aff" rid="r402954a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0003-0612-3661</uri>
                </contrib>
                <aff id="r402954a1">
                    <label>1</label>Universiti Teknologi Malaysia, Johor Bahru, Malaysia</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>12</day>
                <month>8</month>
                <year>2025</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2025 Chan WH</copyright-statement>
                <copyright-year>2025</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport402954" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.166682.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve-with-reservations</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>- in section A, it is mentioned that there are 25 clinical features of METABRIC after remove the two with missing values and redundancy. Does all the 25 remaining features are examined with the t-test? but only part of them are showing in Table 2? Should clarify and justify this.</p>
            <p> - regarding data augmentation, should include justification how appropriate DA techniques are relevant in this case with gene expression, as this is not image. so how random rotation and noise injection would make sense here? the included reference 15, 16 seems doesn't reflect the existing works on this. Though in ref 15's future work did mention about data augmentation, but the support of use of DA here is very minimal. Should justify properly.</p>
            <p> - in this study, MRMR is used as feature selection, should explain more how the MRMR especially the parameter tuning is done, at least about the key settings of MRMR after the hyperparameter tuning and how these parameters would affect the outcome. This should be clarify as the rest of the learning is actually from the output features of this feature selection process.</p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>Yes</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>I cannot comment. A qualified statistician is required.</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Yes</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>Yes</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>Yes</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Partly</p>
            <p>Reviewer Expertise:</p>
            <p>bioinformatics, computational biology, artificial intelligence</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above.</p>
        </body>
        <sub-article article-type="response" id="comment14414-402954">
            <front-stub>
                <contrib-group>
                    <contrib contrib-type="author">
                        <name>
                            <surname>Mohamad</surname>
                            <given-names>Mohd Saberi</given-names>
                        </name>
                        <aff>Genetics and Genomics, United Arab Emirates University, Al Ain, Abu Dhabi, United Arab Emirates</aff>
                    </contrib>
                </contrib-group>
                <author-notes>
                    <fn fn-type="conflict">
                        <p>
                            <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                    </fn>
                </author-notes>
                <pub-date pub-type="epub">
                    <day>23</day>
                    <month>8</month>
                    <year>2025</year>
                </pub-date>
            </front-stub>
            <body>
                <p>1.&#x00a0;&#x00a0; &#x00a0;in section A, it is mentioned that there are 25 clinical features of METABRIC after remove the two with missing values and redundancy. Does all the 25 remaining features are examined with the t-test? but only part of them are showing in Table 2? Should clarify and justify this.</p>
                <p> </p>
                <p> 
                    <underline>This point has been clarified and justified on page 5.</underline>
                </p>
                <p> </p>
                <p> 2.&#x00a0;&#x00a0; &#x00a0;regarding data augmentation, should include justification how appropriate DA techniques are relevant in this case with gene expression, as this is not image. so how random rotation and noise injection would make sense here? the included reference 15, 16 seems doesn't reflect the existing works on this. Though in ref 15's future work did mention about data augmentation, but the support of use of DA here is very minimal. Should justify properly.&#x00a0;&#x00a0; &#x00a0;</p>
                <p> This point has been clarified on page 7 and 8.</p>
                <p> </p>
                <p> 
                    <underline>The justification for applying data augmentation techniques to gene expression data, including the relevance of random rotation and noise injection in this context, has been provided.</underline>
                </p>
                <p> </p>
                <p> 3.&#x00a0;&#x00a0; &#x00a0;in this study, MRMR is used as feature selection, should explain more how the MRMR especially the parameter tuning is done, at least about the key settings of MRMR after the hyperparameter tuning and how these parameters would affect the outcome. This should be clarify as the rest of the learning is actually from the output features of this feature selection process.&#x00a0;&#x00a0;</p>
                <p> </p>
                <p> 
                    <underline>Further explanation has been added regarding the use of MRMR on page 8 and 9.</underline>
                </p>
            </body>
        </sub-article>
        <sub-article article-type="response" id="comment14478-402954">
            <front-stub>
                <contrib-group>
                    <contrib contrib-type="author">
                        <name>
                            <surname>Mohamad</surname>
                            <given-names>Mohd Saberi</given-names>
                        </name>
                        <aff>Genetics and Genomics, United Arab Emirates University, Al Ain, Abu Dhabi, United Arab Emirates</aff>
                    </contrib>
                </contrib-group>
                <author-notes>
                    <fn fn-type="conflict">
                        <p>
                            <bold>Competing interests: </bold>The authors declare that they have no competing interests.</p>
                    </fn>
                </author-notes>
                <pub-date pub-type="epub">
                    <day>31</day>
                    <month>8</month>
                    <year>2025</year>
                </pub-date>
            </front-stub>
            <body>
                <p>Thank you for taking the time to read our article, 
                    <italic>&#x201c;An Improved Deep Learning Algorithm for Breast Cancer Survival Prediction Based on Multi-Omics Data.&#x201d;</italic> In this work, we aimed to develop an interpretable and effective deep learning framework that integrates BiLSTM, CNN, and MRMR feature selection for breast cancer survival prediction.</p>
                <p> We hope that the findings, particularly the accuracy and robust performance across the METABRIC and TCGA datasets, will contribute to ongoing efforts in improving personalised treatment planning and clinical decision-making. We welcome constructive feedback, discussions, and collaborations that can further enhance the clinical translation of this approach.</p>
            </body>
        </sub-article>
    </sub-article>
</article>
