<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN" "http://jats.nlm.nih.gov/publishing/1.2/JATS-journalpublishing1.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="1.2" xml:lang="en">
    <front>
        <journal-meta>
            <journal-id journal-id-type="pmc">F1000Research</journal-id>
            <journal-title-group>
                <journal-title>F1000Research</journal-title>
            </journal-title-group>
            <issn pub-type="epub">2046-1402</issn>
            <publisher>
                <publisher-name>F1000 Research Limited</publisher-name>
                <publisher-loc>London, UK</publisher-loc>
            </publisher>
        </journal-meta>
        <article-meta>
            <article-id pub-id-type="doi">10.12688/f1000research.166350.1</article-id>
            <article-categories>
                <subj-group subj-group-type="heading">
                    <subject>Research Article</subject>
                </subj-group>
                <subj-group>
                    <subject>Articles</subject>
                </subj-group>
            </article-categories>
            <title-group>
                <article-title>A Hybrid Anomaly Detection Framework Combining Supervised and Unsupervised Learning for Credit Card Fraud Detection</article-title>
                <fn-group content-type="pub-status">
                    <fn>
                        <p>[version 1; peer review: 1 approved with reservations, 1 not approved]</p>
                    </fn>
                </fn-group>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Shanaa</surname>
                        <given-names>Mohammad</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Software</role>
                    <role content-type="http://credit.niso.org/">Visualization</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <uri content-type="orcid">https://orcid.org/0000-0001-9787-1408</uri>
                    <xref ref-type="corresp" rid="c1">a</xref>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Abdallah</surname>
                        <given-names>Sherief</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Supervision</role>
                    <role content-type="http://credit.niso.org/">Validation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <aff id="a1">
                    <label>1</label>Faculty of Engineering and IT, The British University in Dubai, Dubai, Dubai, United Arab Emirates</aff>
            </contrib-group>
            <author-notes>
                <corresp id="c1">
                    <label>a</label>
                    <email xlink:href="mailto:mohammadsshanaa@gmail.com">mohammadsshanaa@gmail.com</email>
                </corresp>
                <fn fn-type="conflict">
                    <p>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>7</day>
                <month>7</month>
                <year>2025</year>
            </pub-date>
            <pub-date pub-type="collection">
                <year>2025</year>
            </pub-date>
            <volume>14</volume>
            <elocation-id>664</elocation-id>
            <history>
                <date date-type="accepted">
                    <day>26</day>
                    <month>6</month>
                    <year>2025</year>
                </date>
            </history>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2025 Shanaa M and Abdallah S</copyright-statement>
                <copyright-year>2025</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access article distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <self-uri content-type="pdf" xlink:href="https://f1000research.com/articles/14-664/pdf"/>
            <abstract>
                <sec>
                    <title>Background</title>
                    <p>Credit card fraud detection remains a major challenge because of the highly imbalanced nature of transaction data. Conventional supervised models often suffer from low recall or high false positive rates, whereas unsupervised methods lack precision.</p>
                </sec>
                <sec>
                    <title>Methods</title>
                    <p>In this study, we propose a hybrid anomaly detection framework that combines an unsupervised autoencoder trained on normal transactions to capture reconstruction error patterns with a supervised XGBoost classifier trained on the same dataset. The hybrid system integrates both scores via an optimized thresholding mechanism to balance sensitivity and specificity. We evaluated the model on the publicly available Kaggle creditcard.csv dataset comprising 284,807 transactions, with only 492 labelled fraudulent.</p>
                </sec>
                <sec>
                    <title>Results</title>
                    <p>The proposed model achieved superior performance, with a recall of 0.9250, precision of 0.9569, F1-score of 0.9407, and Matthews Correlation Coefficient (MCC) of 0.9407, with an accuracy of 0.9998, surpassing the results of similar published models using the same dataset.</p>
                </sec>
                <sec>
                    <title>Conclusions</title>
                    <p>This framework provides a practical, reproducible, high-performance solution for detecting financial fraud. The code, model configuration, and data-processing pipeline were made available to support transparency and future research.</p>
                </sec>
            </abstract>
            <kwd-group kwd-group-type="author">
                <kwd>Fraud Detection</kwd>
                <kwd>Autoencoder</kwd>
                <kwd>Isolation Forest</kwd>
                <kwd>XGBoost</kwd>
                <kwd>Random Forest</kwd>
                <kwd>Hybrid Model</kwd>
                <kwd>Anomaly Detection</kwd>
                <kwd>Imbalanced Dataset.</kwd>
            </kwd-group>
            <funding-group>
                <funding-statement>The author(s) declared that no grants were involved in supporting this work.</funding-statement>
            </funding-group>
        </article-meta>
    </front>
    <body>
        <sec id="sec5" sec-type="intro">
            <title>Introduction</title>
            <p>Credit card fraud remains one of the most persistent and damaging threats to the digital financial ecosystem. As the volume of online transactions continues to grow, so too does the complexity of fraudulent activities increases. Global losses are projected to exceed $40 billion annually by 2025, driven by the increasing digitalization of financial services and constant evolution of fraud tactics.
                <sup>
                    <xref ref-type="bibr" rid="ref1">1</xref>
                </sup> The core challenge in this domain lies in accurately detecting fraudulent transactions that are rare (less than 1%), adaptive, and often indistinguishable from legitimate user behavior. This imbalance (between legitimate and fraudulent transactions) significantly impairs the performance of both conventional and machine learning-based detection systems, often leading to biased predictions and poor generalizability across datasets.
                <sup>
                    <xref ref-type="bibr" rid="ref2">2</xref>,
                    <xref ref-type="bibr" rid="ref3">3</xref>
                </sup> Traditional fraud detection methods struggle to scale effectively in such dynamic and imbalanced environments, frequently resulting in missed fraud cases or excessive false positives. Detection systems frequently encounter difficulties in balancing sensitivity and specificity; enhancing fraud detection (true positives) often leads to an increase in false positives, thereby disrupting the customer experience and straining resources. Conversely, conservative models may fail to identify fraudulent activities, leading to financial losses and reputational harm.</p>
            <p>Recent research has highlighted the potential of hybrid models that combine supervised classification techniques with unsupervised anomaly detection to enhance both the precision and robustness of fraud detection. For instance, studies integrating techniques, such as autoencoders, isolation-based methods, and gradient boosting classifiers, have demonstrated improved performance in identifying complex and evolving fraud patterns.
                <sup>
                    <xref ref-type="bibr" rid="ref4">4</xref>
                </sup> However, many of these models still lack generalizability or require substantial computational resources, which limits their practical application in real-time financial environments.</p>
            <p>The aim of this study is to develop and evaluate a hybrid anomaly detection framework that integrates both supervised and unsupervised learning techniques to improve the accuracy, robustness, and generalizability of credit card fraud-detection systems. This study specifically targets the challenges posed by imbalanced data, evolving fraud patterns, and limitations of single-model detection strategies.</p>
            <p>Our approach is empirically validated using the publicly available European credit card fraud dataset, which presents realistic challenges including severe class imbalance. We conducted comprehensive experiments to measure the performance of the model across standard evaluation metrics and benchmarked its results against state-of-the-art techniques. Using this approach, this study aims to demonstrate the practical value and academic contribution of hybrid learning models in improving credit card fraud detection.</p>
            <p>

                <bold>This study makes the following contributions:</bold>

                <list list-type="order">
                    <list-item>
                        <label>1.</label>
                        <p>A novel hybrid anomaly detection framework that integrates supervised (XGBoost, Random Forest) and unsupervised (Autoencoder, Isolation Forest) models is proposed to address the challenges of data imbalance and concept drift in credit card fraud detection.</p>
                    </list-item>
                    <list-item>
                        <label>2.</label>
                        <p>Comparative analysis of the hybrid model against state-of-the-art models using the publicly available and widely adopted Kaggle creditcard.csv dataset.</p>
                    </list-item>
                    <list-item>
                        <label>3.</label>
                        <p>A reproducible pipeline suitable for adaptation in real-world applications that balances detection accuracy with computational efficiency.</p>
                    </list-item>
                </list>
            </p>
        </sec>
        <sec id="sec6">
            <title>Related work</title>
            <p>Credit card fraud detection has become increasingly critical with the rapid expansion of online transactions and growing sophistication of fraudulent activities. Contemporary trends underscore the adoption of advanced machine learning (ML) techniques, which have shown considerable promise in enhancing both the accuracy and efficiency of fraud-detection systems. Nevertheless, these advancements have introduced several challenges, particularly the limitations of traditional anomaly detection methods and the constraints inherent in current ML-based models.</p>
            <p>Traditional approaches to anomaly detection, including rule-based systems and statistical models, have long served as the foundation for fraud detection. However, these techniques frequently struggle to address the dynamic and adaptive nature of fraudulent behavior, which often mimics legitimate transaction patterns. Consequently, they tend to exhibit high false positive rates.
                <sup>
                    <xref ref-type="bibr" rid="ref5">5</xref>,
                    <xref ref-type="bibr" rid="ref6">6</xref>
                </sup> Moreover, such approaches generally fail to scale effectively with the vast and continuously growing volume of transaction data, rendering them less viable in real-time fraud detection scenarios.
                <sup>
                    <xref ref-type="bibr" rid="ref7">7</xref>,
                    <xref ref-type="bibr" rid="ref8">8</xref>
                </sup> Consequently, there has been an increasing shift toward machine learning models that are better equipped to manage large datasets and adapt to evolving fraud strategies.
                <sup>
                    <xref ref-type="bibr" rid="ref9">9</xref>,
                    <xref ref-type="bibr" rid="ref10">10</xref>
                </sup>
            </p>
            <p>Despite their advantages, the existing ML models are not without limitations. A primary concern is the class imbalance inherent in credit card transaction datasets, where legitimate transactions overwhelmingly outnumber fraudulent transactions. This imbalance often leads to skewed model performance, resulting in a high rate of false negatives in which fraudulent transactions remain undetected.
                <sup>
                    <xref ref-type="bibr" rid="ref2">2</xref>,
                    <xref ref-type="bibr" rid="ref3">3</xref>
                </sup> Additionally, many ML models demand extensive feature engineering and frequently struggle to generalize across datasets because of variations in consumer behavior and transaction patterns.
                <sup>
                    <xref ref-type="bibr" rid="ref11">11</xref>,
                    <xref ref-type="bibr" rid="ref12">12</xref>
                </sup> The scarcity of accurately labelled fraudulent transactions further complicates the training process, as acquiring such labels is challenging in real-world settings.
                <sup>
                    <xref ref-type="bibr" rid="ref13">13</xref>
                </sup>
            </p>
            <p>Hybrid approaches have emerged as promising solutions for mitigating these issues. By combining different methodologies, researchers have been able to enhance the detection accuracy and reduce false positives.
                <sup>
                    <xref ref-type="bibr" rid="ref14">14</xref>,
                    <xref ref-type="bibr" rid="ref15">15</xref>
                </sup> For example, hybrid models that integrate convolutional neural networks with support vector machines have demonstrated improved performance in identifying anomalies in financial datasets.
                <sup>
                    <xref ref-type="bibr" rid="ref15">15</xref>
                </sup> These methods exploit the strengths of diverse algorithms and contribute more robust and generalizable detection capabilities. Moreover, similar hybrid strategies have shown effectiveness in other domains facing anomaly detection challenges, including healthcare and cybersecurity.
                <sup>
                    <xref ref-type="bibr" rid="ref14">14</xref>
                </sup>
            </p>
            <p>In the context of fraud detection research, several benchmark datasets are frequently used, notably the European Credit Card Transactions dataset and the Kaggle Credit Card Fraud Detection dataset. These datasets are distinguished by their high dimensionality and extreme class imbalance, with fraudulent instances often comprising less than 1% of the total records.
                <sup>
                    <xref ref-type="bibr" rid="ref2">2</xref>,
                    <xref ref-type="bibr" rid="ref3">3</xref>
                </sup> In particular, the European dataset includes anonymized transaction features derived from Principal Component Analysis (PCA) to ensure user privacy, making it suitable for academic use.
                <sup>
                    <xref ref-type="bibr" rid="ref12">12</xref>,
                    <xref ref-type="bibr" rid="ref16">16</xref>
                </sup> Such datasets are instrumental in training and evaluating fraud-detection models because they closely reflect the complexities encountered in real-world applications.</p>
            <p>In summary, although traditional anomaly detection techniques have laid the foundational framework for credit card fraud detection, the adoption of machine learning and hybrid methodologies opens new possibilities for improving the detection efficacy. Nonetheless, persistent challenges necessitate ongoing research in this field. The advancement of more sophisticated hybrid models and the utilization of comprehensive real-world datasets will be essential to overcome these hurdles and further progress in this critical area.</p>
            <p>In the domain of credit card fraud detection, unsupervised learning methods have garnered increasing attention owing to their capacity to identify anomalies without relying on labelled data. Among these, clustering algorithms such as DBSCAN and HDBSCAN have demonstrated considerable potential. For instance,
                <sup>
                    <xref ref-type="bibr" rid="ref1">1</xref>
                </sup> reported that combining HDBSCAN with UMAP and SMOTE enables the identification of previously unseen fraud patterns, while significantly reducing false positives. Similarly, deep-learning-based anomaly detection frameworks, such as the attentional anomaly detection network proposed by,
                <sup>
                    <xref ref-type="bibr" rid="ref16">16</xref>
                </sup> show promise for capturing behavioral transaction anomalies without the need for predefined class labels. These approaches are particularly advantageous in real-world contexts where labelled fraudulent data are limited, allowing the detection of novel fraud patterns that traditional supervised models may overlook.
                <sup>
                    <xref ref-type="bibr" rid="ref17">17</xref>
                </sup>
            </p>
            <p>Conversely, supervised learning techniques, particularly gradient boosting methods, such as XGBoost, have been widely adopted owing to their robustness and interpretability.
                <sup>
                    <xref ref-type="bibr" rid="ref2">2</xref>
                </sup> highlighted the effectiveness of XGBoost when paired with data augmentation strategies, such as SMOTE ENN, achieving high accuracy with low false-positive rates. Further evidence from
                <sup>
                    <xref ref-type="bibr" rid="ref18">18</xref>
                </sup> demonstrated that integrating XGBoost with resampling methods enhanced the overall performance across a range of machine learning models. Notably, the inherent capability of XGBoost to handle imbalanced datasets makes it particularly well-suited for credit card fraud detection, where fraudulent transactions comprise only a small fraction of the total dataset.
                <sup>
                    <xref ref-type="bibr" rid="ref10">10</xref>
                </sup>
            </p>
            <p>Hybrid approaches integrating supervised and unsupervised learning have emerged as promising strategies,
                <sup>
                    <xref ref-type="bibr" rid="ref14">14</xref>
                </sup> for example, presented a deep learning model combined with SMOTE oversampling, which effectively addressed the class imbalance issue while improving the detection accuracy. Similarly,
                <sup>
                    <xref ref-type="bibr" rid="ref19">19</xref>
                </sup> illustrated the benefits of combining neural networks with traditional machine learning techniques to enhance the overall detection efficacy. These hybrid models exploit the complementary strengths of each learning paradigm, thereby resulting in adaptive and accurate systems.</p>
            <p>Despite these advancements, several persistent challenges continue to hinder optimal fraud detection performance. A primary issue is class imbalance, wherein the overwhelming dominance of legitimate transactions can bias models and reduce their sensitivity to fraudulent instances.
                <sup>
                    <xref ref-type="bibr" rid="ref11">11</xref>
                </sup> Additionally, the constantly evolving tactics of fraudsters necessitate frequent model retraining and updates, which can be both computationally and operationally demanding.
                <sup>
                    <xref ref-type="bibr" rid="ref11">11</xref>
                </sup> Scalability is also a concern, as many models exhibit performance degradation when deployed in large-scale or real-time transaction streams.
                <sup>
                    <xref ref-type="bibr" rid="ref20">20</xref>
                </sup>
            </p>
            <p>The performance metrics across existing models vary significantly in terms of scalability, accuracy, and operational efficiency. Research indicates that ensemble techniques that combine multiple classifiers tend to outperform individual models in terms of their robustness and accuracy.
                <sup>
                    <xref ref-type="bibr" rid="ref21">21</xref>
                </sup> However, the increased computational requirements of ensemble models may limit their applicability in time-sensitive scenarios.
                <sup>
                    <xref ref-type="bibr" rid="ref20">20</xref>
                </sup> In contrast, XGBoost has often been identified as a suitable compromise, offering a favorable balance between predictive performance and computational efficiency, which makes it attractive for real-world fraud detection systems.
                <sup>
                    <xref ref-type="bibr" rid="ref2">2</xref>,
                    <xref ref-type="bibr" rid="ref22">22</xref>
                </sup>
            </p>
            <p>Research into hybrid anomaly detection models typically seeks to fulfil several key objectives, including enhancing detection accuracy, improving robustness against emerging fraud patterns, and integrating both supervised and unsupervised learning techniques to capitalize on the strengths of each approach. Hybrid models are particularly advantageous in scenarios where labelled data are limited because they enable the use of unsupervised methods to identify anomalies, whereas supervised models refine and validate these detections.
                <sup>
                    <xref ref-type="bibr" rid="ref23">23</xref>&#x2013;
                    <xref ref-type="bibr" rid="ref25">25</xref>
                </sup> For example, integrating supervised models that learn from historical transaction data with unsupervised models capable of detecting novel anomalies facilitates a more comprehensive detection framework, addressing the limitations of methods that rely solely on a single-learning paradigm.
                <sup>
                    <xref ref-type="bibr" rid="ref23">23</xref>,
                    <xref ref-type="bibr" rid="ref24">24</xref>
                </sup>
            </p>
            <p>The literature highlights notable gaps in existing anomaly detection frameworks, particularly their limited adaptability to evolving fraud patterns and poor generalizability across diverse datasets. Hybrid models offer a promising solution to these issues by leveraging various data sources and learning strategies, thereby increasing their effectiveness in real-world deployment.
                <sup>
                    <xref ref-type="bibr" rid="ref26">26</xref>,
                    <xref ref-type="bibr" rid="ref27">27</xref>
                </sup> For instance, studies incorporating Generative Adversarial Networks (GANs) into traditional machine learning workflows have demonstrated improved detection of complex fraud patterns that may elude conventional models.
                <sup>
                    <xref ref-type="bibr" rid="ref4">4</xref>
                </sup> Moreover, the flexibility of hybrid models supports continuous learning and adaptation, which are essential features of the constantly evolving fraud landscape.
                <sup>
                    <xref ref-type="bibr" rid="ref23">23</xref>,
                    <xref ref-type="bibr" rid="ref24">24</xref>
                </sup>
            </p>
            <p>Success in fraud detection research is typically measured using performance metrics such as accuracy, precision, recall, and F1-score, which collectively evaluate a model&#x2019;s capability to correctly identify fraudulent transactions while maintaining operational efficiency.
                <sup>
                    <xref ref-type="bibr" rid="ref28">28</xref>,
                    <xref ref-type="bibr" rid="ref29">29</xref>
                </sup> Minimizing false positives and effectively identifying previously unseen fraud cases are also critical indicators of success.
                <sup>
                    <xref ref-type="bibr" rid="ref23">23</xref>,
                    <xref ref-type="bibr" rid="ref24">24</xref>
                </sup> Models that strike a balance between high accuracy and low false positive rates are particularly valued, as they reduce the burden of manual transaction reviews and minimize disruption to legitimate users.
                <sup>
                    <xref ref-type="bibr" rid="ref23">23</xref>,
                    <xref ref-type="bibr" rid="ref24">24</xref>,
                    <xref ref-type="bibr" rid="ref29">29</xref>
                </sup>
            </p>
            <p>Both supervised and unsupervised learning play an integral role in addressing the research challenges in fraud detection. Supervised learning is particularly effective when sufficient labelled data are available, enabling the model to learn the distinctions between fraudulent and non-fraudulent transactions.
                <sup>
                    <xref ref-type="bibr" rid="ref30">30</xref>,
                    <xref ref-type="bibr" rid="ref31">31</xref>
                </sup> By contrast, unsupervised learning excels in scenarios where labels are unavailable, identifying novel or emerging fraud patterns without prior examples.
                <sup>
                    <xref ref-type="bibr" rid="ref23">23</xref>&#x2013;
                    <xref ref-type="bibr" rid="ref25">25</xref>
                </sup> The integration of both techniques enhances not only the model&#x2019;s detection capacity but also the interpretability and adaptability of the fraud detection framework, as evidenced by research that underscores their complementary nature.
                <sup>
                    <xref ref-type="bibr" rid="ref24">24</xref>,
                    <xref ref-type="bibr" rid="ref25">25</xref>
                </sup>
            </p>
            <p>In the literature, &#x201c;success&#x201d; in fraud detection is frequently defined in terms of balancing detection performance with operational efficiency. This includes the ability to accurately detect fraudulent transactions with minimal false positives, thereby ensuring that genuine users are not adversely affected.
                <sup>
                    <xref ref-type="bibr" rid="ref23">23</xref>&#x2013;
                    <xref ref-type="bibr" rid="ref25">25</xref>
                </sup> Furthermore, a model&#x2019;s adaptability to new fraud typologies and its performance across various datasets are equally important for assessing its practical applicability and overall robustness.
                <sup>
                    <xref ref-type="bibr" rid="ref23">23</xref>&#x2013;
                    <xref ref-type="bibr" rid="ref25">25</xref>
                </sup>
            </p>
        </sec>
        <sec id="sec7">
            <title>Unsupervised methods</title>
            <sec id="sec8">
                <title>Autoencoders</title>
                <p>Autoencoders have emerged as powerful tools for feature extraction in anomaly detection, particularly fraud detection. By leveraging their ability to learn compressed representations of data, autoencoders can effectively identify anomalies by reconstructing the input data and measuring the reconstruction error. This process allows for the extraction of relevant features that distinguish normal data from anomalies, as the model learns to ignore noise and irrelevant information during training.
                    <sup>
                        <xref ref-type="bibr" rid="ref32">32</xref>&#x2013;
                        <xref ref-type="bibr" rid="ref34">34</xref>
                    </sup> The architecture of autoencoders, which typically consists of an encoder and decoder, facilitates dimensionality reduction, making them suitable for high-dimensional datasets often encountered in fraud detection scenarios.
                    <sup>
                        <xref ref-type="bibr" rid="ref35">35</xref>,
                        <xref ref-type="bibr" rid="ref36">36</xref>
                    </sup>
                </p>
                <p>Despite their advantages, autoencoders have limitations when applied to unsupervised-learning tasks. A significant challenge is determining an appropriate reconstruction error threshold, which is crucial for distinguishing between normal and anomalous instances. This threshold can be influenced by the distribution of reconstruction errors, and improper selection may lead to high false positive rates or missed detections.
                    <sup>
                        <xref ref-type="bibr" rid="ref33">33</xref>,
                        <xref ref-type="bibr" rid="ref37">37</xref>,
                        <xref ref-type="bibr" rid="ref38">38</xref>
                    </sup> Moreover, autoencoders can struggle with class imbalances because they are typically trained on predominantly normal data, making it difficult to generalize to rare fraudulent instances.
                    <sup>
                        <xref ref-type="bibr" rid="ref37">37</xref>,
                        <xref ref-type="bibr" rid="ref39">39</xref>
                    </sup> Additionally, the complexity of the model can lead to overfitting, particularly when the training dataset is small or lacks diversity.
                    <sup>
                        <xref ref-type="bibr" rid="ref40">40</xref>,
                        <xref ref-type="bibr" rid="ref41">41</xref>
                    </sup>
                </p>
                <p>When comparing autoencoders to other unsupervised methods in fraud detection, such as clustering and traditional statistical methods, autoencoders often demonstrate superior performance because of their ability to learn complex, non-linear relationships in the data.
                    <sup>
                        <xref ref-type="bibr" rid="ref35">35</xref>,
                        <xref ref-type="bibr" rid="ref39">39</xref>,
                        <xref ref-type="bibr" rid="ref42">42</xref>
                    </sup> For example, while clustering methods may struggle with high-dimensional data, autoencoders can effectively reduce dimensionality and capture intricate patterns that signify fraudulent behavior.
                    <sup>
                        <xref ref-type="bibr" rid="ref35">35</xref>,
                        <xref ref-type="bibr" rid="ref39">39</xref>,
                        <xref ref-type="bibr" rid="ref42">42</xref>
                    </sup> Furthermore, ensemble methods that combine autoencoders with other algorithms, such as Random Forests or Gradient Boosting, have shown promising results in improving detection accuracy and robustness against class imbalance.
                    <sup>
                        <xref ref-type="bibr" rid="ref40">40</xref>,
                        <xref ref-type="bibr" rid="ref41">41</xref>
                    </sup>
                </p>
                <p>In summary, autoencoders are effective for feature extraction in anomaly detection, particularly fraud. Their architectures, such as VAEs and LSTM autoencoders, are suitable for various data types. However, issues, such as threshold determination and class imbalance, require further investigation. In this study, we combined autoencoders with other models to enhance the results and address these challenges.</p>
            </sec>
            <sec id="sec9">
                <title>Isolation forest</title>
                <p>The Isolation Forest algorithm is a powerful tool for anomaly detection, particularly in financial datasets. It operates based on the principle of isolating anomalies, instead of profiling normal data points. This is achieved by constructing a random forest of isolation trees, where each tree is built by randomly selecting a feature and then randomly selecting a split value between the maximum and minimum values of that feature. Anomalies are identified as instances that require fewer splits to be isolated because they are often located far from the majority of the data points in the feature space.
                    <sup>
                        <xref ref-type="bibr" rid="ref43">43</xref>,
                        <xref ref-type="bibr" rid="ref44">44</xref>
                    </sup> This characteristic makes isolation forests particularly effective in high-dimensional datasets, where traditional methods may struggle owing to the curse of dimensionality. Studies have shown that isolation forests maintain robust performance in high-dimensional settings, effectively identifying outliers, even when dimensionality increases significantly.
                    <sup>
                        <xref ref-type="bibr" rid="ref43">43</xref>,
                        <xref ref-type="bibr" rid="ref45">45</xref>
                    </sup>
                </p>
                <p>Parameter tuning is crucial for optimizing the performance of an isolation-forest algorithm. Common techniques include adjusting the number of trees in the forest and subsampling size, which can influence the sensitivity of the model to anomalies. For instance, increasing the number of trees generally improves the robustness of the model, while the subsampling size can be tuned to balance between computational efficiency and detection accuracy.
                    <sup>
                        <xref ref-type="bibr" rid="ref45">45</xref>,
                        <xref ref-type="bibr" rid="ref46">46</xref>
                    </sup> In terms of computational advantages, the Isolation Forest algorithm is highly efficient and requires linear time complexity relative to the number of data points, making it scalable for large datasets.
                    <sup>
                        <xref ref-type="bibr" rid="ref44">44</xref>,
                        <xref ref-type="bibr" rid="ref47">47</xref>
                    </sup>
                </p>
                <p>Isolated forests can also be integrated into hybrid models to enhance their anomaly detection capabilities. For example, it can be combined with supervised learning techniques to refine the detection process by leveraging labelled data for training. This integration allows for improved feature selection and anomaly characterization, leading to better overall performance in detecting complex patterns in financial datasets.
                    <sup>
                        <xref ref-type="bibr" rid="ref48">48</xref>,
                        <xref ref-type="bibr" rid="ref49">49</xref>
                    </sup> Such hybrid approaches can utilize the strengths of multiple algorithms, thereby improving the robustness and accuracy of anomaly detection frameworks in various applications, including fraud detection in banking and finance.
                    <sup>
                        <xref ref-type="bibr" rid="ref49">49</xref>
                    </sup>
                </p>
                <p>In summary, the Isolation Forest algorithm is a robust method for detecting anomalies in financial datasets, and is particularly effective in high-dimensional spaces. Parameter tuning plays a critical role in optimizing the performance, whereas its computational efficiency makes it suitable for large datasets. Despite these limitations, the integration of isolated forests with other methods in hybrid models can significantly enhance their anomaly detection capabilities.</p>
            </sec>
        </sec>
        <sec id="sec10">
            <title>Supervised methods</title>
            <sec id="sec11">
                <title>XGBoost</title>
                <p>Extreme gradient boosting (XGBoost) has emerged as a powerful tool for fraud detection, particularly in the context of imbalanced datasets. The algorithm&#x2019;s inherent ability to handle imbalanced data stems from its gradient boosting framework, which optimizes the model by focusing on misclassified instances, thereby enhancing its sensitivity to minority classes, such as fraudulent transactions. This characteristic is crucial in fraud detection, where fraudulent cases often significantly outnumber legitimate ones.
                    <sup>
                        <xref ref-type="bibr" rid="ref50">50</xref>,
                        <xref ref-type="bibr" rid="ref51">51</xref>
                    </sup> Furthermore, XGBoost incorporates regularization techniques that help mitigate overfitting, which is a common challenge in machine learning models trained on imbalanced datasets
                    <sup>
                        <xref ref-type="bibr" rid="ref50">50</xref>,
                        <xref ref-type="bibr" rid="ref51">51</xref>
                    </sup>
                </p>
                <p>Hyperparameter tuning is essential for optimizing the performance of XGBoost in fraud detection tasks. Techniques such as grid search, random search, and more advanced methods such as Bayesian optimization have been employed to identify the most effective hyperparameters. For instance, the use of Bayesian optimization has been shown to enhance the model&#x2019;s ability to balance training weights for asymmetric examples, which is particularly beneficial in fraud-detection scenarios.
                    <sup>
                        <xref ref-type="bibr" rid="ref52">52</xref>,
                        <xref ref-type="bibr" rid="ref53">53</xref>
                    </sup>
                </p>
                <p>When comparing XGBoost with other supervised learning methods, it consistently demonstrates superior performance in fraud-detection tasks. Studies have shown that XGBoost outperforms traditional models such as logistic regression and decision trees as well as other ensemble methods such as random forests. This superiority is attributed to its ability to capture complex non-linear relationships and interactions between features, which are often present in fraud detection datasets.
                    <sup>
                        <xref ref-type="bibr" rid="ref54">54</xref>,
                        <xref ref-type="bibr" rid="ref55">55</xref>
                    </sup> Moreover, XGBoost&#x2019;s feature importance capabilities allow practitioners to gain insights into the most influential predictors of fraud, further enhancing model interpretability and decision-making processes.
                    <sup>
                        <xref ref-type="bibr" rid="ref19">19</xref>,
                        <xref ref-type="bibr" rid="ref56">56</xref>
                    </sup>
                </p>
                <p>Researchers have also explored the integration of XGBoost with hybrid anomaly-detection models. For instance, combining XGBoost with unsupervised learning techniques allows for the extraction of patterns from data that can be used as new features, thereby improving the robustness of the model against noise and outliers.
                    <sup>
                        <xref ref-type="bibr" rid="ref57">57</xref>
                    </sup>
                </p>
                <p>In conclusion, XGBoost&#x2019;s optimization for fraud detection in imbalanced datasets is facilitated by its robust handling of misclassifications, effective hyperparameter tuning techniques, and superior performance compared to other supervised learning methods. The role of feature importance is critical in refining model performance, while hybrid approaches continue to expand the capabilities of XGBoost in anomaly detection scenarios.</p>
            </sec>
            <sec id="sec12">
                <title>Random forest</title>
                <p>Random Forest (RF) is a versatile ensemble technique that has been broadly applied in anomaly detection for both supervised and semi-supervised learning tasks. In fully supervised settings, RF algorithms are trained with labelled examples covering both normal and anomalous classes, thereby enabling the model to learn complex non-linear decision boundaries that can reliably separate rare and abnormal events.
                    <sup>
                        <xref ref-type="bibr" rid="ref58">58</xref>
                    </sup> In contrast, semi-supervised applications typically exploit RF&#x2019;s ability to capture underlying data distributions by training exclusively on normal (or &#x201c;positive&#x201d;) samples and subsequently flagging deviations as anomalies.
                    <sup>
                        <xref ref-type="bibr" rid="ref59">59</xref>
                    </sup>
                </p>
                <p>The performance of RF is particularly noteworthy in high-dimensional and large-scale datasets such as those encountered in credit card fraud detection. RF can naturally handle large numbers of features owing to its random feature subspace selection at each split, which mitigates overfitting and improves generalization.
                    <sup>
                        <xref ref-type="bibr" rid="ref60">60</xref>
                    </sup> Empirical studies have demonstrated that RF-based methods perform competitively in scenarios characterized by rare events, such as fraud detection, by effectively identifying subtle patterns that differentiate fraudulent from legitimate behaviors.
                    <sup>
                        <xref ref-type="bibr" rid="ref60">60</xref>
                    </sup> Nevertheless, the class imbalance inherent in such applications often calls for hybrid or improved approaches, for example, through combination with feature selection procedures or integration with unsupervised algorithms, to further boost detection accuracy.</p>
            </sec>
        </sec>
        <sec id="sec13">
            <title>Hybrid integration</title>
            <p>Hybrid models, which combine unsupervised and supervised learning techniques, have gained traction in various fields owing to their ability to leverage the strengths of both approaches. The integration of unsupervised outputs with supervised methods can enhance the predictive performance, particularly in scenarios where labelled data are scarce. This synthesis typically involves several strategies, including feature extraction, ensemble methods, and model stacking, which can significantly improve the overall performance of the hybrid models.</p>
            <p>One effective integration strategy is the use of unsupervised learning for feature extraction, which can reduce dimensionality and capture underlying patterns in the data. For instance, autoencoders or clustering algorithms can preprocess data before they are fed into a supervised learning model, thereby enhancing their predictive capabilities.
                <sup>
                    <xref ref-type="bibr" rid="ref61">61</xref>,
                    <xref ref-type="bibr" rid="ref62">62</xref>
                </sup> In addition, ensemble methods that combine predictions from unsupervised and supervised models can lead to more robust outcomes. For example, a hybrid model that integrates predictions from a clustering algorithm with those from a regression model can yield a better accuracy than either model alone.
                <sup>
                    <xref ref-type="bibr" rid="ref63">63</xref>
                </sup>
            </p>
            <p>Handling conflicting outputs from unsupervised and supervised models is a critical challenge in hybrid modelling. Researchers often employ conflict resolution strategies such as voting mechanisms, where the final decision is based on the majority output, or weighted averaging, where outputs are combined based on their reliability or performance metrics.
                <sup>
                    <xref ref-type="bibr" rid="ref64">64</xref>,
                    <xref ref-type="bibr" rid="ref65">65</xref>
                </sup> This approach allows for more nuanced integration of the models, ensuring that the final output reflects the strengths of both methodologies. In this study, we utilized a weighting method to combine the outputs of supervised and unsupervised algorithms.</p>
            <p>In summary, hybrid models that integrate unsupervised and supervised methods offer significant advantages in terms of predictive performance and robustness. By employing effective integration strategies, resolving conflicts between outputs, and utilizing appropriate benchmarks for evaluation, researchers can harness the strengths of both methodologies to address complex challenges across various domains.</p>
        </sec>
        <sec id="sec14">
            <title>Evaluation metrics</title>
            <p>In fraud detection studies, various evaluation metrics were employed to assess the performance of the models. Commonly used metrics include accuracy, precision, recall, F1-score, and area under the receiver operating characteristic curve (AUC-ROC). Each metric provides unique insights into the effectiveness of the model in identifying fraudulent activity.</p>
            <p>Precision, recall, and F1-score are particularly significant in the context of anomaly detection. Precision measures the proportion of true positive predictions among all positive predictions, indicating the number of flagged instances that were fraudulent. However, recall assesses the proportion of true positives among all actual positives, reflecting the model&#x2019;s ability to identify all relevant instances. The F1-score is the harmonic mean of the precision and recall, providing a single metric that balances both concerns. In fraud detection, where false positives can lead to unnecessary investigations and false negatives can result in undetected fraud, these metrics are crucial for evaluating the model performance.
                <sup>
                    <xref ref-type="bibr" rid="ref66">66</xref>,
                    <xref ref-type="bibr" rid="ref67">67</xref>
                </sup>
                <disp-formula id="e1">

                    <mml:math display="block">
                        <mml:mtext>Precision</mml:mtext>
                        <mml:mo>=</mml:mo>
                        <mml:mfrac>
                            <mml:mi mathvariant="italic">TP</mml:mi>
                            <mml:mrow>
                                <mml:mi mathvariant="italic">TP</mml:mi>
                                <mml:mo>+</mml:mo>
                                <mml:mi mathvariant="italic">FP</mml:mi>
                            </mml:mrow>
                        </mml:mfrac>
                    </mml:math>
</disp-formula>
            </p>
            <p>Precision means: Of all predicted positive cases, how many were actually positive.
                <disp-formula id="e2">

                    <mml:math display="block">
                        <mml:mtext>Recall</mml:mtext>
                        <mml:mo>=</mml:mo>
                        <mml:mfrac>
                            <mml:mi mathvariant="italic">TP</mml:mi>
                            <mml:mrow>
                                <mml:mi mathvariant="italic">TP</mml:mi>
                                <mml:mo>+</mml:mo>
                                <mml:mi mathvariant="italic">FN</mml:mi>
                            </mml:mrow>
                        </mml:mfrac>
                    </mml:math>
</disp-formula>
            </p>
            <p>Recall means: Of all actual positive cases, how many were correctly predicted.
                <disp-formula id="e3">

                    <mml:math display="block">
                        <mml:mi mathvariant="normal">F</mml:mi>
                        <mml:mn>1</mml:mn>
                        <mml:mo>&#x2212;</mml:mo>
                        <mml:mtext>Score</mml:mtext>
                        <mml:mo>=</mml:mo>
                        <mml:mn>2</mml:mn>
                        <mml:mo>&#x00d7;</mml:mo>
                        <mml:mfrac>
                            <mml:mrow>
                                <mml:mtext>Precision</mml:mtext>
                                <mml:mo>&#x00d7;</mml:mo>
                                <mml:mtext>Recall</mml:mtext>
                            </mml:mrow>
                            <mml:mrow>
                                <mml:mtext>Precision</mml:mtext>
                                <mml:mo>+</mml:mo>
                                <mml:mtext>Recall</mml:mtext>
                            </mml:mrow>
                        </mml:mfrac>
                    </mml:math>
</disp-formula>
            </p>
            <p>F1-Score: Harmonic means of Precision and Recall &#x2014; a balance between the two.</p>
            <p>Where:</p>
            <p>TP = True Positives</p>
            <p>TN = True Negatives</p>
            <p>FP = False Positives</p>
            <p>FN = False Negatives</p>
            <p>The trade-off between accuracy and computational efficiency is critical for fraud detection. While accuracy provides a straightforward measure of overall correctness, it can be misleading in imbalanced datasets, which are common in fraud-detection scenarios where fraudulent cases are rare compared with legitimate ones. Computational efficiency, on the other hand, refers to the time and resources required to train and deploy models. Models that achieve high accuracy may require extensive computational resources, making them less practical for real-time fraud detection applications. Therefore, it is necessary to strike a balance must be struck between achieving high accuracy and maintaining computational efficiency to ensure that the models can operate effectively in real-world environments.
                <sup>
                    <xref ref-type="bibr" rid="ref66">66</xref>,
                    <xref ref-type="bibr" rid="ref67">67</xref>
                </sup>
            </p>
            <p>AUC-ROC curves are instrumental in assessing model performance, particularly in binary classification tasks such as fraud detection. The ROC (Receiver Operating Characteristic) curve plots the true positive rate against the false positive rate at various threshold settings, allowing for visualization of the trade-off between sensitivity and specificity. The AUC (Area Under the Curve) quantifies the overall ability of the model to discriminate between the positive and negative classes, with values closer to 1 indicating better performance. AUC-ROC is particularly useful in fraud detection because it provides a comprehensive view of the model&#x2019;s performance across different decision thresholds, aiding in the selection of an optimal threshold for deployment.
                <sup>
                    <xref ref-type="bibr" rid="ref68">68</xref>&#x2013;
                    <xref ref-type="bibr" rid="ref70">70</xref>
                </sup>
            </p>
            <p>Several datasets and competitions exist in terms of benchmarks for comparing model results for fraud detection. For instance, Kaggle competitions often provide standardized datasets for benchmarking machine-learning models. Additionally, the UCI Machine Learning Repository includes various datasets relevant to fraud detection, allowing researchers to compare their models with established baselines. These benchmarks facilitate the evaluation of new methods against existing approaches and promote advancements in the field.
                <sup>
                    <xref ref-type="bibr" rid="ref66">66</xref>,
                    <xref ref-type="bibr" rid="ref67">67</xref>
                </sup>
            </p>
            <p>In summary, the evaluation metrics commonly used in fraud-detection studies include precision, recall, F1-score, and AUC-ROC. Each metric offers valuable insights into the model performance, particularly in the context of imbalanced datasets. The trade-off between accuracy and computational efficiency highlights the need for practical solutions for real-time applications. AUC-ROC curves serve as vital tools for assessing model discrimination capabilities, whereas established benchmarks provide a framework for comparative analysis in the field. The researcher used precision, recall, and F1-score to evaluate the performance of the hybrid model. Additionally, AUC-ROC and MCC (Matthews Correlation Coefficient) values were calculated to obtain insights into the model results.</p>
        </sec>
        <sec id="sec15" sec-type="methods">
            <title>Methods</title>
            <sec id="sec16">
                <title>In dataset description</title>
                <p>The creditcard.csv dataset, which is widely utilized in fraud-detection research, is characterized by its focus on credit card transactions, specifically anonymized records from European cardholders. This dataset typically contains features such as transaction time, transaction amount, and various anonymized features derived from PCA (Principal Component Analysis) to protect user privacy. A notable aspect of this dataset is its significant class imbalance, where fraudulent transactions are vastly outnumbered by legitimate transactions, presenting a challenge for machine learning models.
                    <sup>
                        <xref ref-type="bibr" rid="ref30">30</xref>,
                        <xref ref-type="bibr" rid="ref71">71</xref>,
                        <xref ref-type="bibr" rid="ref72">72</xref>
                    </sup> The dataset consists of approximately 284,807 transactions, with only 492 labelled as fraudulent, highlighting the difficulty of detecting fraud owing to the rarity of positive instances.
                    <sup>
                        <xref ref-type="bibr" rid="ref9">9</xref>,
                        <xref ref-type="bibr" rid="ref73">73</xref>
                    </sup>
                </p>
                <p>The quality of datasets significantly affects the performance of hybrid models in fraud detection. High-quality datasets enable more accurate feature extraction and model training, leading to improved detection rates and reduced false positives.
                    <sup>
                        <xref ref-type="bibr" rid="ref20">20</xref>,
                        <xref ref-type="bibr" rid="ref74">74</xref>
                    </sup> Conversely, poor-quality datasets can result in overfitting, where models perform well on training data but fail to generalize to unseen data, ultimately undermining their effectiveness in real-world applications.
                    <sup>
                        <xref ref-type="bibr" rid="ref9">9</xref>,
                        <xref ref-type="bibr" rid="ref75">75</xref>
                    </sup> Therefore, ensuring high-quality data is essential for developing reliable and efficient fraud-detection systems.</p>
                <p>This study employs the publicly available 
                    <bold>creditcard.csv</bold> dataset from Kaggle, which contains anonymized credit card transaction data from European cardholders. The dataset consists of 

                    <bold>284,807 transactions</bold>, of which 
                    <bold>492 are labelled as fraudulent</bold>, representing approximately 
                    <bold>0.17%</bold> of the total data. The features include 
                    <bold>28 principal components</bold> (V1&#x2013;V28) derived through 
                    <bold>Principal Component Analysis (PCA)</bold> to preserve privacy, along with the 
                    <monospace>Amount</monospace>, 
                    <monospace>Time</monospace>, and 
                    <monospace>Class</monospace> attributes. The 
                    <monospace>Class</monospace> variable serves as the binary target label, where 
                    <monospace>1</monospace> indicates fraud, and 
                    <monospace>0</monospace> represents a legitimate transaction.</p>
            </sec>
            <sec id="sec17">
                <title>Data preprocessing and class imbalance</title>
                <p>The preprocessing challenges in real-world financial datasets are prevalent and multifaceted. Common issues include handling missing values, addressing class imbalances, and ensuring data privacy and security.
                    <sup>
                        <xref ref-type="bibr" rid="ref2">2</xref>,
                        <xref ref-type="bibr" rid="ref6">6</xref>,
                        <xref ref-type="bibr" rid="ref76">76</xref>
                    </sup>
                </p>
                <p>As a 
                    <bold>preprocessing</bold> step, The 
                    <bold>MinMaxScaler</bold> technique was used. MinMaxScaler is a widely used data pre-processing technique that transforms numerical features by rescaling them to a specified range, typically between 0 and 1. This scaling method preserves the relationships between the original data values while ensuring that all features contribute proportionately to model training. It is particularly effective for distance-based algorithms and neural networks, which are sensitive to differences in feature magnitude. This helps standardize features such as transaction amounts or time-related attributes, enabling models such as autoencoders to converge more quickly and effectively.</p>
                <p>Additionally, researchers have employed 
                    <bold>Principal Component Analysis</bold> (
                    <bold>PCA</bold>) as a preprocessing tool for 
                    <bold>dimensionality reduction</bold>. PCA is a widely used technique for dimensionality reduction during anomaly detection. By transforming high-dimensional data into a lower-dimensional space, PCA helps identify patterns and anomalies more efficiently. This is achieved by projecting the data onto the directions of maximum variance, effectively filtering out noise and irrelevant features, which can obscure the detection of anomalies.
                    <sup>
                        <xref ref-type="bibr" rid="ref77">77</xref>,
                        <xref ref-type="bibr" rid="ref78">78</xref>
                    </sup>
                </p>
                <p>Furthermore, 
                    <bold>class imbalance</bold>, where legitimate transactions far outnumber fraudulent transactions, complicates the training of machine-learning models, often leading to biased predictions that favor the majority class.
                    <sup>
                        <xref ref-type="bibr" rid="ref72">72</xref>,
                        <xref ref-type="bibr" rid="ref79">79</xref>
                    </sup> To address class imbalance, researchers employed the 
                    <bold>BorderlineSMOTE</bold> method to address class imbalance in the creditcard.csv dataset. However, this method is exclusively applied during the training of supervised methods because it adversely affects the unsupervised algorithms.</p>
            </sec>
        </sec>
        <sec id="sec18" sec-type="results|discussion">
            <title>Results and Discussion</title>
            <p>In this study, XGBoost and Random Forest were employed as supervised learning algorithms, whereas the Autoencoder and Isolation Forest were utilized as unsupervised methods to detect anomalies. The data preprocessing pipeline includes MinMax normalization to standardize the feature scales and remove statistical outliers to reduce noise and improve model stability. To address the high dimensionality of the dataset, Principal Component Analysis (PCA) was applied as a dimensionality reduction technique, preserving the most significant variance components.</p>
            <p>In addition, BorderlineSMOTE was incorporated into the training process of the supervised models to address class imbalance and improve minority class learning. This technique was particularly beneficial in enhancing the sensitivity of classifiers to fraudulent transactions while also reducing the risk of overfitting to rare fraud instances. Moreover, BorderlineSMOTE contributes to increased robustness against boundary-region vulnerabilities and potential data-poisoning attacks, thereby strengthening the overall generalization capability of supervised components.</p>
            <p>As an initial step, we analyzed the performance of each method. The table below (
                <xref ref-type="table" rid="T1">
Table 1</xref>) presents the results of the precision, recall, F1-score and accuracy for each method for both normal cases (0) and fraud cases (1).</p>
            <table-wrap id="T1" orientation="portrait" position="float">
                <label>
Table 1. </label>
                <caption>
                    <title>Performance results for XGBoost, RandomForest, Autoencoder, and IsolationForest.</title>
                </caption>
                <table content-type="article-table" frame="hsides">
                    <thead>
                        <tr>
                            <th align="left" colspan="1" rowspan="1" valign="top">Method</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Precision(0)</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Precision(1)</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Recall(0)</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Recall(1)</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">F1-score(0)</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">F1-score(1)</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">
Accuracy</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">XGBoost</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.9999</bold>
</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9407</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.9999</bold>
</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9250</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.9999</bold>
</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.9328</bold>
</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.9998</bold>
</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">RandomForest</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9998</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.9459</bold>
</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.9999</bold>
</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.8750</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.9999</bold>
</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9091</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9997</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Autoencoder</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9993</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.5847</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9994</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.5750</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9993</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.5798</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9987</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">IsolationForest</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.9999</bold>
</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.0192</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9230</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.9500</bold>
</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9599</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.0376</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9230</td>
                        </tr>
                    </tbody>
                </table>
            </table-wrap>
            <p>Among the evaluated methods, XGBoost exhibited the best overall performance. It achieves near-perfect results for the majority class (Class 0) with precision (0) = 0.9999 and recall (0) = 0.9999 and maintains a high level of performance in the minority class (Class 1, i.e., fraud cases), with precision (1) = 0.9407, recall (1) = 0.9250, and F1-score (1) = 0.9328. This balance between precision and recall is crucial for fraud detection, indicating that XGBoost not only detects the most fraudulent transactions but also minimizes false alarms. The overall accuracy of 0.9998 further confirms its robustness, although in imbalanced datasets, the accuracy alone is not a sufficient indicator. In conclusion, XGBoost is a top-performing supervised method that effectively manages both false positives and false negatives.</p>
            <p>Moreover, Random Forest also demonstrates strong performance in the majority class, similar to XGBoost, with Precision(0) = 0.9998 and Recall(0) = 0.9999. However, it performed slightly lower on the minority class, with recall (1) = 0.8750 and F1-score (1) = 0.9091. This suggests that while Random Forest is highly effective, it may miss a small number of fraud cases compared to XGBoost. Nevertheless, its accuracy of 0.9997 confirms its high reliability. In conclusion, Random Forest is an effective and reliable ensemble method, but slightly less optimal than XGBoost for fraud detection.</p>
            <p>In contrast, The Autoencoder, an unsupervised learning method trained on normal data (Class 0), performs exceptionally well on the majority class, with precision (0) = 0.9993 and recall (0) = 0.9994. However, its fraud detection performance was significantly lower, with precision (1) = 0.5847, recall (1) = 0.5750, and F1-score (1) = 0.5798. Although it still detects some anomalies, the model generates a large number of false positives and fails to detect many frauds. In conclusion, the autoencoder is moderately effective as a baseline anomaly detector but lacks precision and recall for minority class identification in isolation.</p>
            <p>The isolation Forest produces poor results for fraud detection, with precision (1) = 0.0192 and F1-score (1) = 0.0376, despite a relatively high recall (1) = 0.9500. This suggests that while it flags nearly all frauds (high recall), it generates an extremely high number of false positives (very low precision), making it impractical for real-world fraud detection, where every alert carries a cost. The overall accuracy of 0.9230 was misleadingly high, inflated by the overwhelming presence of normal transactions. In conclusion, the forest isolation method is overly sensitive and lacks practical usefulness for fraud detection in imbalanced datasets.</p>
            <p>In high-stakes domains, such as credit card fraud detection, the cost of false positives (customer complaints) and false negatives (missed fraud) must be minimized. Among the models tested, XGBoost provided the best trade-off between fraud detection and noise minimization. Hybrid approaches that combine the sensitivity of unsupervised methods (such as autoencoders) with the precision of supervised learners (such as XGBoost or RF) may offer better results when properly tuned.</p>
            <p>Hence, in this study, we tested a hybrid model by combining these four methods (XGBoost, RandomForest, Autoencoder, and IsolationForest) and applied a weight tool as it assigns different importance levels (weights) to the outputs of various models (e.g., Autoencoder, XGBoost, Isolation Forest, etc.) when combining their anomaly scores into a single decision score.</p>
            <p>The table below (
                <xref ref-type="table" rid="T2">
Table 2</xref>) presents the final performance results after combining the methods and applying the weights. We named the model XRAI, which is the first letter of each method (
                <bold>X</bold>GBoost, 
                <bold>R</bold>andomForest, 
                <bold>A</bold>utoencoder, and 
                <bold>I</bold>solationForest).</p>
            <table-wrap id="T2" orientation="portrait" position="float">
                <label>
Table 2. </label>
                <caption>
                    <title>Performance comparison between XRAI and other models.</title>
                </caption>
                <table content-type="article-table" frame="hsides">
                    <thead>
                        <tr>
                            <th align="left" colspan="1" rowspan="1" valign="top">Method</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Precision(0)</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Precision(1)</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Recall(0)</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Recall(1)</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">F1-score(0)</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">F1-score(1)</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">
Accuracy</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="middle">XRAI</td>
                            <td align="left" colspan="1" rowspan="1" valign="middle">
                                <bold>0.9999</bold>
</td>
                            <td align="left" colspan="1" rowspan="1" valign="middle">
                                <bold>0.9569</bold>
</td>
                            <td align="left" colspan="1" rowspan="1" valign="middle">
                                <bold>0.9999</bold>
</td>
                            <td align="left" colspan="1" rowspan="1" valign="middle">0.9250</td>
                            <td align="left" colspan="1" rowspan="1" valign="middle">
                                <bold>0.9999</bold>
</td>
                            <td align="left" colspan="1" rowspan="1" valign="middle">
                                <bold>0.9407</bold>
</td>
                            <td align="left" colspan="1" rowspan="1" valign="middle">
                                <bold>0.9998</bold>
</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">XGBoost</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9999</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9407</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9999</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9250</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9999</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9328</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9998</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">RandomForest</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9998</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9459</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9999</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.8750</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9999</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9091</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9997</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Autoencoder</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9993</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.5847</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9994</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.5750</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9993</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.5798</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9987</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">IsolationForest</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9999</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.0192</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9230</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.9500</bold>
</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9599</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.0376</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.9230</td>
                        </tr>
                    </tbody>
                </table>
                <table-wrap-foot>
                    <p>

                        <bold>XRAI</bold> is the new proposed model, the name comes from the first letter for each selected method (XGBoost, RandomForest, Autoencoder, and IsolationForest).</p>
                </table-wrap-foot>
            </table-wrap>
            <p>The hybrid XRAI model, which integrates the strengths of XGBoost, Random Forest, Autoencoder, and Isolation Forest using a weighted score, demonstrates outstanding anomaly detection capability. It effectively combines supervised and unsupervised methods to balance precision, recall, and generalization, which are crucial in high-stake fraud detection scenarios.</p>
            <sec id="sec19">
                <title>Performance on the majority class (Normal - Class 0)</title>
                <p>

                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Precision (0) = 0.9999 and recall (0) = 0.9999 indicate near-perfect classification of legitimate transactions.</p>
                            <p>This means that the model is extremely reliable for minimizing false positives, which is critical for avoiding the disruption of normal customer activity.</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>The F1-score (0) = 0.9999 confirms that there is no trade-off between precision and recall for normal transactions.</p>
                        </list-item>
                    </list>
                </p>
            </sec>
            <sec id="sec20">
                <title>Performance on the minority class (Fraud - Class 1)</title>
                <p>

                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Precision (1) = 0.9569 indicates that when the model flags a transaction as fraudulent, it is correct approximately 96% of the time, which is vital to avoid wasting resources on false alarms.</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Recall (1) = 0.9250 shows that the model can capture over 92% of all fraudulent transactions, which is an impressive detection rate given the class imbalance and subtlety of the fraud patterns.</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>The F1-score (1) = 0.9407 demonstrates a strong harmonic balance between precision and recall, making the model highly effective for real-world deployment.</p>
                        </list-item>
                    </list>
                </p>
                <p>
                    <xref ref-type="fig" rid="f1">
Figure 1</xref> illustrates the Receiver Operating Characteristic (ROC) curve for the proposed hybrid anomaly detection model, XRAI (XGBoost, Random Forest, Autoencoder, Isolation Forest). The ROC curve plots the True Positive Rate (recall) against the False Positive Rate (1 - Specificity) across a range of classification thresholds.</p>
                <fig fig-type="figure" id="f1" orientation="portrait" position="float">
                    <label>
Figure 1. </label>
                    <caption>
                        <title>Receiver Operating Characteristic (ROC) curve for the proposed model XRAI.</title>
                        <p>(XRAI: First letters of XGBoost, Random Forest, Autoencoder, Isolation Forest).</p>
                    </caption>
                    <graphic id="gr1" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/183325/666adf66-e222-48dd-b7c6-763c08f08dc3_figure1.gif"/>
                </fig>
                <p>The curve shows a steep rise toward the upper-left corner of the plot, which is indicative of a high-performing classifier. The area under the ROC curve (AUC) is 0.9885, suggesting that the model had excellent discriminative capability. An AUC value closer to 1 indicates that the classifier is highly capable of distinguishing between the positive class (fraudulent transactions) and negative class (legitimate transactions).</p>
                <p>In summary, the ROC curve and its corresponding AUC of 0.9885 provide strong empirical evidence of XRAI&#x2019;s ability to effectively separate fraud from non-fraud, even under class imbalance conditions, a critical requirement for robust fraud-detection systems in the financial domain.</p>
                <p>The proposed XRAI model, an ensemble combining XGBoost, Random Forest, Autoencoder, and Isolation Forest, achieved a Matthews Correlation Coefficient (MCC) of 94.07%, indicating a strong and balanced predictive performance, particularly in the context of imbalanced classification tasks such as credit card fraud detection.</p>
                <p>The XRAI model demonstrates a highly optimized hybrid ensemble for credit card fraud detection. It achieves excellent detection of rare fraudulent cases, while maintaining ultralow false-positive rates. The combination of supervised precision and unsupervised anomaly sensitivity is managed through a weighted mechanism that positions XRAI as a practically deployable solution in real-time financial anomaly detection systems.</p>
            </sec>
            <sec id="sec21">
                <title>Comparison to other similar studies</title>
                <p>To contextualize the performance of the proposed hybrid anomaly detection framework, a comparative analysis was conducted with recent studies on credit card fraud detection that utilized similar datasets and evaluation metrics. The objective of this comparison is to demonstrate the relative effectiveness of the proposed model in terms of precision, recall, F1-score, and MCC.</p>
                <p>Several studies have explored both the single-model and hybrid approaches using the Kaggle credit card fraud dataset. These models include supervised methods such as Logistic Regression, Random Forest, and XGBoost as well as unsupervised techniques such as Isolation Forest and Autoencoder-based anomaly detectors. In more recent works, hybrid models combining deep learning and ensemble techniques have been proposed to address the limitations of detection accuracy and generalizability.</p>
                <p>
                    <xref ref-type="table" rid="T3">
Table 3</xref>, summarizes the selection of comparable studies, outlining the key models used and their reported results. The evaluation metrics used in each study were also included to enable a standardized comparison. Where applicable, the performance of the proposed hybrid model is highlighted to illustrate the improvements over the existing approaches.</p>
                <table-wrap id="T3" orientation="portrait" position="float">
                    <label>
Table 3. </label>
                    <caption>
                        <title>Comparative performance of proposed model vs. existing studies.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Method</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Precision</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Recall (TPR)</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
F1-score</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">MCC</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
TNR</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Our Proposed Method (XRAI)</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>0.9998</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.9569</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>0.9250</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>0.9407</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>0.9407</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>0.9999</bold>
</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Ding et al. (2024)
                                    <sup>
                                        <xref ref-type="bibr" rid="ref80">80</xref>
                                    </sup> - AE + LightGBM (AEELG)</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.921</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.8875</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.3451</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.4722</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.4739</td>
                                <td colspan="1" rowspan="1"/>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Du et al. (2024)
                                    <sup>
                                        <xref ref-type="bibr" rid="ref81">81</xref>
                                    </sup> - AE-XGB-SMOTE-CGAN</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.9993</td>
                                <td colspan="1" rowspan="1"/>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.7839</td>
                                <td colspan="1" rowspan="1"/>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.8845</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.9997</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Alshameri &amp; Xia (2024)
                                    <sup>
                                        <xref ref-type="bibr" rid="ref82">82</xref>
                                    </sup> &#x2013; VAE</td>
                                <td colspan="1" rowspan="1"/>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.93</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.92</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.92</td>
                                <td colspan="1" rowspan="1"/>
                                <td colspan="1" rowspan="1"/>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Wu &amp; Wang (2022)
                                    <sup>
                                        <xref ref-type="bibr" rid="ref83">83</xref>
                                    </sup> - Autoencoder + Adversarial Net</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.9061</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.9216</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.8878</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.9044</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.8128</td>
                                <td colspan="1" rowspan="1"/>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Lok et al. (2022)
                                    <sup>
                                        <xref ref-type="bibr" rid="ref23">23</xref>
                                    </sup> - Hybrid Kmeans -KNN</td>
                                <td colspan="1" rowspan="1"/>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.9579</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.7215</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.8231</td>
                                <td colspan="1" rowspan="1"/>
                                <td colspan="1" rowspan="1"/>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Ishak et al. (2022)
                                    <sup>
                                        <xref ref-type="bibr" rid="ref84">84</xref>
                                    </sup> - Enhanced Stacking Classifier System</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.9837</td>
                                <td colspan="1" rowspan="1"/>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.8841</td>
                                <td colspan="1" rowspan="1"/>
                                <td colspan="1" rowspan="1"/>
                                <td colspan="1" rowspan="1"/>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Benchaji et al. (2021)
                                    <sup>
                                        <xref ref-type="bibr" rid="ref85">85</xref>
                                    </sup> - Attention + LSTM</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.9672</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">
                                    <bold>0.9885</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.9191</td>
                                <td colspan="1" rowspan="1"/>
                                <td colspan="1" rowspan="1"/>
                                <td colspan="1" rowspan="1"/>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>As shown in the 
                    <xref ref-type="table" rid="T3">
Table 3</xref>, the proposed hybrid model achieved superior performance across multiple metrics, attaining the 
                    <bold>highest accuracy with value of 0.9998</bold>, 
                    <bold>precision of 0.9569 (in top threee)</bold> and 
                    <bold>recall of 0.9250 (top one)</bold>, resulting in an 
                    <bold>F1-score of 0.9407 (top one)</bold> and 
                    <bold>MCC of 0.9407 (top one)</bold>. These results reflect significant advancements over earlier models, particularly in balancing the trade-off between the sensitivity and specificity.</p>
                <p>This comparison substantiates the effectiveness of the proposed framework and supports its relevance as a practical, high-performance solution for financial fraud detection.</p>
            </sec>
            <sec id="sec22">
                <title>Real-world applications of the model in financial fraud detection</title>
                <p>The findings of this study have significant implications for real-world financial fraud detection, particularly in environments where data are imbalanced, adversarial, and evolving. The proposed hybrid model, XRAI, demonstrated exceptional accuracy and robustness in detecting anomalies in widely used credit cards. csv dataset. By leveraging the strengths of XGBoost, Random Forest, Autoencoder, and Isolation Forest through a weighted scoring mechanism, XRAI offers a holistic and practical approach for identifying fraudulent financial transactions in real-time.</p>
                <p>One of the most critical applications of this model is early detection of credit card fraud. Financial institutions are facing increasing threats from sophisticated fraud schemes that are often hidden within massive volumes of transactional data. Traditional models that rely solely on supervised learning struggle with previously unseen and rare types of fraud. By incorporating unsupervised models, such as autoencoders and isolation forests, XRAI can detect previously unclassified anomalies, enabling systems to capture zero-day fraud attacks that evade conventional classifiers.</p>
                <p>In addition to fraud detection, this hybrid approach can be adapted for anti-money laundering (AML) systems, insurance fraud detection, and transaction monitoring in e-commerce. Given the adaptability of the model to high-dimensional and noisy data, it can also be used in environments beyond banking, such as healthcare claim validation or cyber intrusion detection, where anomalous patterns are often rare and context dependent.</p>
                <p>The practical benefits of this hybrid system extend beyond academic experimentation. It offers a deployable, scalable, and intelligent solution for industries facing complex fraud challenges. As financial crime continues to grow in scale and complexity, systems such as XRAI provide a promising blueprint for building more secure, proactive, and trustworthy fraud detection frameworks.</p>
            </sec>
            <sec id="sec23">
                <title>Challenges in implementation and model limitations</title>
                <p>Although the XRAI hybrid model presents a strong case for fraud-detection performance, several limitations emerged during the development and evaluation that must be addressed to fully understand its practical applicability. These limitations can be grouped into three primary categories: data, models, and operational constraints.</p>
                <p>First, it relies on the creditcard.csv dataset, which has certain constraints despite its popularity. It is highly imbalanced, anonymized, and preprocessed and does not fully reflect the diversity and noise found in real-world financial data. Features such as merchant category, transaction geolocation, and time-series behavior were not present in this dataset. This limits the generalizability of the model to broader financial environments. Moreover, the dataset lacks adversarial fraud samples that mimic legitimate behavior, which is increasingly common in real financial systems.</p>
                <p>Second, the complexity of hybrid architecture introduces challenges in terms of interpretability, maintenance, and scalability. Although the ensemble combines multiple strengths, it also has its weaknesses. For example, autoencoders require careful tuning and are sensitive to reconstruction thresholds, whereas Isolation Forests tend to produce high false-positive rates unless precisely calibrated. Managing the balance of weights across all models adds an additional layer of complexity, particularly when adapting a system to new datasets or changing fraud patterns.</p>
                <p>Another limitation is the requirement for labelled data for supervised components, such as XGBoost and Random Forest. Labeling fraud in real-world data is often delayed or incomplete, which can limit the speed of retraining and adaptation. In rapidly changing environments, supervised models become stable unless mechanisms are in place for online or incremental learning.</p>
                <p>In summary, although XRAI provides strong fraud-detection performance in experimental settings, its real-world deployment requires careful consideration of data diversity, model manageability, latency, and compliance. Addressing these limitations can further enhance its reliability and adoption.</p>
            </sec>
        </sec>
        <sec id="sec24">
            <title>Conclusion and future work</title>
            <p>This study introduced a novel hybrid model, 
                <bold>XRAI</bold>, designed to enhance the performance and robustness of anomaly detection in credit card fraud-detection systems. By strategically integrating supervised learning algorithms such as XGBoost and Random Forest with unsupervised techniques such as autoencoders and isolation forests, the model effectively overcomes the limitations of single-classifier approaches in highly unbalanced and adversarial environments.</p>
            <p>The XRAI model demonstrated strong predictive power across a range of performance metrics, achieving an accuracy of 99.98%, precision of 95.69%, recall of 92.50%, and F1-score of 94.07%. The Matthews Correlation Coefficient (MCC) of 94.07% and AUC of 0.9885 further indicate a high discriminative ability and balanced performance between the fraud and non-fraud classes. These results highlight the model&#x2019;s potential for real-time deployment in financial institutions aimed at reducing operational risks and minimizing false alarms.</p>
            <p>Despite these achievements, the study also acknowledged key limitations, including reliance on a single publicly available dataset (creditcard.csv), the computational cost of the hybrid architecture, and interpretability challenges. These limitations pave the way for further research in this area.</p>
            <p>Building on the current findings, future research on the XRAI model can pursue several promising directions to enhance its applicability and robustness in real-world settings. A critical improvement involves incorporating 
                <bold>temporal and contextual features</bold>, as fraudulent behaviors often manifest as sequential patterns over time. Leveraging techniques such as LSTM-based Autoencoders or Transformer-based architectures can enhance the detection of complex and evolving fraud strategies. Moreover, integrating contextual data, such as customer profiles, merchant categories, and geographic transaction information, can further improve classification accuracy and reduce false positives.</p>
            <p>Future studies should focus on 
                <bold>adaptive ensemble strategies</bold>, 
                <bold>explainable AI techniques</bold>, and 
                <bold>robustness against adversarial attacks</bold>. Testing the model across diverse datasets and domains is essential to validate its generalizability and scalability.</p>
            <p>In conclusion, the XRAI model presents a scalable, intelligent, and highly accurate solution for credit-card fraud detection. With further refinements in temporal modelling, explainability, and robustness, hybrid models such as XRAI hold significant promise for building trustworthy and resilient fraud detection systems tailored to the ever-evolving landscape of financial crime.</p>
            <sec id="sec25">
                <title>Ethical considerations</title>
                <p>Not applicable. This study does not involve human or animal subjects.</p>
            </sec>
        </sec>
        <sec id="sec26">
            <title>Contributions</title>
            <p>The contributions of each author are described according to the CRediT (Contributor Roles Taxonomy) system:
                <list list-type="bullet">
                    <list-item>
                        <label>&#x2022;</label>
                        <p>Mohammad Shanaa: Conceptualization; Methodology; Data Curation; Formal Analysis; Software; Validation; Visualization; Writing &#x2013; Original Draft; Writing &#x2013; Review &amp; Editing; Project Administration.</p>
                        <p>Mohammad Shanaa led the design and execution of the research, conducted the data analysis and model development, and prepared the initial and revised versions of the manuscript.</p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>Sherief Abdallah: Supervision; Conceptualization; Writing &#x2013; Review &amp; Editing.</p>
                        <p>Sherief Abdallah supervised the research process, contributed to refining the methodology and framing the research direction, and provided critical revisions to the manuscript.</p>
                    </list-item>
                </list>
            </p>
        </sec>
    </body>
    <back>
        <sec id="sec29" sec-type="data-availability">
            <title>Data availability</title>
            <sec id="sec30">
                <title>Underlying data</title>
                <p>This project is utilizing creditcard.csv dataset which is available on Kaggle website. The dataset is available with license type Database Contents License (DbCL) v1.0.</p>
                <p>Users can download the dataset using the following steps:
                    <list list-type="bullet">
                        <list-item>
                            <label>-</label>
                            <p>Visit: 
                                <ext-link ext-link-type="uri" xlink:href="https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud">https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud</ext-link>
                            </p>
                        </list-item>
                        <list-item>
                            <label>-</label>
                            <p>Login/Register with Kaggle website</p>
                        </list-item>
                        <list-item>
                            <label>-</label>
                            <p>Click on Download option, and select Download dataset option</p>
                        </list-item>
                    </list>
                </p>
            </sec>
            <sec id="sec31">
                <title>Extended data</title>
                <p>The source code can be accessed from: 
                    <ext-link ext-link-type="uri" xlink:href="https://github.com/mohshanaa/XRAI.git">https://github.com/mohshanaa/XRAI.git</ext-link>
                </p>
                <p>Archived code as time of publication
                    <sup>
                        <xref ref-type="bibr" rid="ref86">86</xref>
                    </sup>: 
                    <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5281/zenodo.15626193">https://doi.org/10.5281/zenodo.15626193</ext-link>
                </p>
                <p>License: 
                    <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International</ext-link>
                </p>
            </sec>
        </sec>
        <ack>
            <title>Acknowledgements</title>
            <p>This manuscript utilized OpenAI&#x2019;s ChatGPT (GPT-4) for drafting, linguistic refinement, and grammatical editing. Additionally, scite.ai was employed to identify and evaluate pertinent academic sources. The final content is the result of the author&#x2019;s original work and the critical analysis.</p>
        </ack>
        <ref-list>
            <title>References</title>
            <ref id="ref1">
                <label>1</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Setiawan</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Tjahjono</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Firmansyah</surname>
                            <given-names>G</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Fraud Detection in Credit Card Transactions Using HDBSCAN, UMAP and SMOTE Methods.</article-title>
                    <source>

                        <italic toggle="yes">International Journal of Science, Technology &amp; Management.</italic>
</source>
                    <year>2023</year>;<volume>4</volume>:<fpage>1333</fpage>&#x2013;<lpage>1339</lpage>.
                    <pub-id pub-id-type="doi">10.46729/ijstm.v4i5.929</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref2">
                <label>2</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Noviandy</surname>
                            <given-names>TR</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Idroes</surname>
                            <given-names>GM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Maulana</surname>
                            <given-names>A</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Credit Card Fraud Detection for Contemporary Financial Management Using XGBoost-Driven Machine Learning and Data Augmentation Techniques.</article-title>
                    <source>

                        <italic toggle="yes">Indatu J Manag Account.</italic>
</source>
                    <year>2023</year>;<volume>1</volume>:<fpage>29</fpage>&#x2013;<lpage>35</lpage>.
                    <pub-id pub-id-type="doi">10.60084/ijma.v1i1.78</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref3">
                <label>3</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Shimu Khatun</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Rabiul Alam</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Taslim</surname>
                            <given-names>M</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Handling Class Imbalance in Credit Card Fraud Using Various Sampling Techniques.</article-title>
                    <source>

                        <italic toggle="yes">Am J Multidis Res Innov.</italic>
</source>
                    <year>2022</year>;<volume>1</volume>:<fpage>160</fpage>&#x2013;<lpage>168</lpage>.
                    <pub-id pub-id-type="doi">10.54536/ajmri.v1i4.633</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref4">
                <label>4</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Naidoo</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Marivate</surname>
                            <given-names>V</given-names>
                        </name>
</person-group>:
                    <chapter-title>Unsupervised Anomaly Detection of Healthcare Providers Using Generative Adversarial Networks.</chapter-title>
                    <person-group person-group-type="editor">

                        <name name-style="western">
                            <surname>Hattingh</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Matthee</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Smuts</surname>
                            <given-names>H</given-names>
                        </name>

                        <etal/>
</person-group>, editors.
                    <source>

                        <italic toggle="yes">Responsible Design, Implementation and Use of Information and Communication Technology.</italic>
</source>
                    <publisher-loc>Cham</publisher-loc>:
                    <publisher-name>Springer International Publishing</publisher-name>;<year>2020</year>; vol.<volume>12066</volume>: pp.<fpage>419</fpage>&#x2013;<lpage>430</lpage>.
                    <pub-id pub-id-type="doi">10.1007/978-3-030-44999-5_35</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref5">
                <label>5</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Peng</surname>
                            <given-names>H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wang</surname>
                            <given-names>J</given-names>
                        </name>
</person-group>:
                    <article-title>Unbalanced Data Processing and Machine Learning in Credit Card Fraud Detection.</article-title>
                    <year>2022</year>.
                    <pub-id pub-id-type="doi">10.21203/rs.3.rs-2004320/v1</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref6">
                <label>6</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Gowda</surname>
                            <given-names>VT</given-names>
                        </name>
</person-group>:
                    <source>

                        <italic toggle="yes">Credit Card Fraud Detection using Supervised and Unsupervised Learning.</italic>
</source>
                    <publisher-name>Computer Science &amp; Information Technology (CS &amp; IT), AIRCC Publishing Corporation</publisher-name>;<year>2021</year>;<fpage>93</fpage>&#x2013;<lpage>98</lpage>.
                    <pub-id pub-id-type="doi">10.5121/csit.2021.111107</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref7">
                <label>7</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ganji</surname>
                            <given-names>VR</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Chaparala</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Sajja</surname>
                            <given-names>R</given-names>
                        </name>
</person-group>:
                    <article-title>Shuffled shepherd political optimization-based deep learning method for credit card fraud detection.</article-title>
                    <source>

                        <italic toggle="yes">Concurr. Comput.</italic>
</source>
                    <year>2023</year>;<volume>35</volume>:<fpage>e7666</fpage>.
                    <pub-id pub-id-type="doi">10.1002/cpe.7666</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref8">
                <label>8</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Jain</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Arora</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mehra</surname>
                            <given-names>A</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Anomaly Detection Algorithms in Financial Data.</article-title>
                    <source>

                        <italic toggle="yes">IJEAT.</italic>
</source>
                    <year>2021</year>;<volume>10</volume>:<fpage>76</fpage>&#x2013;<lpage>78</lpage>.
                    <pub-id pub-id-type="doi">10.35940/ijeat.E2598.0610521</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref9">
                <label>9</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Aslam</surname>
                            <given-names>F</given-names>
                        </name>
</person-group>:
                    <article-title>Advancing Credit Card Fraud Detection: A Review of Machine Learning Algorithms and the Power of Light Gradient Boosting.</article-title>
                    <source>

                        <italic toggle="yes">AJCST.</italic>
</source>
                    <year>2024</year>.
                    <pub-id pub-id-type="doi">10.11648/ajcst.20240701.12</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref10">
                <label>10</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Pitsane</surname>
                            <given-names>MY</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mogale</surname>
                            <given-names>H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Rensburg</surname>
                            <given-names>JJV</given-names>
                        </name>
</person-group>:
                    <article-title>Improving Accuracy of Credit Card Fraud Detection Using Supervised Machine Learning Models and Dimension Reduction.</article-title>
                    <source>

                        <italic toggle="yes">ICONIC.</italic>
</source>
                    <year>2022</year>;<volume>2022</volume>:<fpage>290</fpage>&#x2013;<lpage>301</lpage>.
                    <pub-id pub-id-type="doi">10.59200/ICONIC.2022.032</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref11">
                <label>11</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Saad</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Nadher</surname>
                            <given-names>I</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hameed</surname>
                            <given-names>SM</given-names>
                        </name>
</person-group>:
                    <article-title>Credit Card Fraud Detection Challenges and Solutions: A Review.</article-title>
                    <source>

                        <italic toggle="yes">Iraqi J. Sci.</italic>
</source>
                    <year>2024</year>;<fpage>2287</fpage>&#x2013;<lpage>2303</lpage>.
                    <pub-id pub-id-type="doi">10.24996/ijs.2024.65.4.42</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref12">
                <label>12</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Zhang</surname>
                            <given-names>Y-F</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lu</surname>
                            <given-names>H-L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lin</surname>
                            <given-names>H-F</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>The Optimized Anomaly Detection Models Based on an Approach of Dealing with Imbalanced Dataset for Credit Card Fraud Detection.</article-title>
                    <source>

                        <italic toggle="yes">Mob. Inf. Syst.</italic>
</source>
                    <year>2022</year>;<volume>2022</volume>:<fpage>1</fpage>&#x2013;<lpage>10</lpage>.
                    <pub-id pub-id-type="doi">10.1155/2022/8027903</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref13">
                <label>13</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Zheng</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Yang</surname>
                            <given-names>L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Xin</surname>
                            <given-names>D</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>The Credit Card Anti-fraud Detection Model in the Context of Dynamic Integration Selection Algorithm.</article-title>
                    <source>

                        <italic toggle="yes">FCIS.</italic>
</source>
                    <year>2024</year>;<volume>6</volume>:<fpage>119</fpage>&#x2013;<lpage>122</lpage>.
                    <pub-id pub-id-type="doi">10.54097/a5jafgdv</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref14">
                <label>14</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Maheshwari</surname>
                            <given-names>VC</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Osman</surname>
                            <given-names>NA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Aziz</surname>
                            <given-names>N</given-names>
                        </name>
</person-group>:
                    <article-title>A Hybrid Approach Adopted for Credit Card Fraud Detection Based on Deep Neural Networks and Attention Mechanism.</article-title>
                    <source>

                        <italic toggle="yes">ARASET.</italic>
</source>
                    <year>2023</year>;<volume>32</volume>:<fpage>315</fpage>&#x2013;<lpage>331</lpage>.
                    <pub-id pub-id-type="doi">10.37934/araset.32.1.315331</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref15">
                <label>15</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Berhane</surname>
                            <given-names>T</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Melese</surname>
                            <given-names>T</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Walelign</surname>
                            <given-names>A</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A Hybrid Convolutional Neural Network and Support Vector Machine-Based Credit Card Fraud Detection Model.</article-title>
                    <source>

                        <italic toggle="yes">Math. Probl. Eng.</italic>
</source>
                    <year>2023</year>;<volume>2023</volume>:<fpage>8134627</fpage>.
                    <pub-id pub-id-type="doi">10.1155/2023/8134627</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref16">
                <label>16</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Jiang</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Dong</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wang</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Credit Card Fraud Detection Based on Unsupervised Attentional Anomaly Detection Network.</article-title>
                    <source>

                        <italic toggle="yes">Systems.</italic>
</source>
                    <year>2023</year>;<volume>11</volume>:<fpage>305</fpage>.
                    <pub-id pub-id-type="doi">10.3390/systems11060305</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref17">
                <label>17</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Alharbi</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Alshammari</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Okon</surname>
                            <given-names>OD</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A Novel text2IMG Mechanism of Credit Card Fraud Detection: A Deep Learning Approach.</article-title>
                    <source>

                        <italic toggle="yes">Electronics.</italic>
</source>
                    <year>2022</year>;<volume>11</volume>:<fpage>756</fpage>.
                    <pub-id pub-id-type="doi">10.3390/electronics11050756</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref18">
                <label>18</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Hajek</surname>
                            <given-names>P</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Abedin</surname>
                            <given-names>MZ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Sivarajah</surname>
                            <given-names>U</given-names>
                        </name>
</person-group>:
                    <article-title>Fraud Detection in Mobile Payment Systems using an XGBoost-based Framework.</article-title>
                    <source>

                        <italic toggle="yes">Inf. Syst. Front.</italic>
</source>
                    <year>2023</year>;<volume>25</volume>:<fpage>1985</fpage>&#x2013;<lpage>2003</lpage>.
                    <pub-id pub-id-type="pmid">36258679</pub-id>
                    <pub-id pub-id-type="doi">10.1007/s10796-022-10346-6</pub-id>
                    <pub-id pub-id-type="pmcid">PMC9560719</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref19">
                <label>19</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Liu</surname>
                            <given-names>C</given-names>
                        </name>
</person-group>:
                    <article-title>Enhancing Credit Card Fraud Detection on Imbalanced Datasets.</article-title>
                    <source>

                        <italic toggle="yes">HBEM.</italic>
</source>
                    <year>2023</year>;<volume>21</volume>:<fpage>765</fpage>&#x2013;<lpage>773</lpage>.
                    <pub-id pub-id-type="doi">10.54097/hbem.v21i.14759</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref20">
                <label>20</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Airlangga</surname>
                            <given-names>G</given-names>
                        </name>
</person-group>:
                    <article-title>Evaluating the Efficacy of Machine Learning Models in Credit Card Fraud Detection.</article-title>
                    <source>

                        <italic toggle="yes">CNAHPC.</italic>
</source>
                    <year>2024</year>;<volume>6</volume>:<fpage>829</fpage>&#x2013;<lpage>837</lpage>.
                    <pub-id pub-id-type="doi">10.47709/cnahpc.v6i2.3814</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref21">
                <label>21</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Murat</surname>
                            <given-names>RK</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Tursunmetova</surname>
                            <given-names>F</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Nadirov</surname>
                            <given-names>NK</given-names>
                        </name>
</person-group>:
                    <article-title>MULTI-CLASSIFIERS SYSTEM FOR CREDIT CARD FRAUD DETECTION.</article-title>
                    <source>

                        <italic toggle="yes">BTOUPhMath.</italic>
</source>
                    <year>2023</year>;<fpage>33</fpage>&#x2013;<lpage>47</lpage>.
                    <pub-id pub-id-type="doi">10.48081/NMPU3955</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref22">
                <label>22</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Sujitha</surname>
                            <given-names>P</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Vanitha</surname>
                            <given-names>R</given-names>
                        </name>
</person-group>:
                    <article-title>Enhanced Technique for Credit Card Extortion Detection Using Extreme Gradient Boosting Algorithm.</article-title>
                    <source>

                        <italic toggle="yes">MEJAST.</italic>
</source>
                    <year>2023</year>;<volume>06</volume>:<fpage>35</fpage>&#x2013;<lpage>45</lpage>.
                    <pub-id pub-id-type="doi">10.46431/MEJAST.2023.6205</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref23">
                <label>23</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Lok</surname>
                            <given-names>LK</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Abdul Hameed</surname>
                            <given-names>V</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ehsan Rana</surname>
                            <given-names>M</given-names>
                        </name>
</person-group>:
                    <article-title>Hybrid machine learning approach for anomaly detection.</article-title>
                    <source>

                        <italic toggle="yes">IJEECS.</italic>
</source>
                    <year>2022</year>;<volume>27</volume>:<fpage>1016</fpage>.
                    <pub-id pub-id-type="doi">10.11591/ijeecs.v27.i2.pp1016-1024</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref24">
                <label>24</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Debener</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Heinke</surname>
                            <given-names>V</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kriebel</surname>
                            <given-names>J</given-names>
                        </name>
</person-group>:
                    <article-title>Detecting insurance fraud using supervised and unsupervised machine learning.</article-title>
                    <source>

                        <italic toggle="yes">J. Risk Insur.</italic>
</source>
                    <year>2023</year>;<volume>90</volume>:<fpage>743</fpage>&#x2013;<lpage>768</lpage>.
                    <pub-id pub-id-type="doi">10.1111/jori.12427</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref25">
                <label>25</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Carcillo</surname>
                            <given-names>F</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Le Borgne</surname>
                            <given-names>Y-A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Caelen</surname>
                            <given-names>O</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Combining unsupervised and supervised learning in credit card fraud detection.</article-title>
                    <source>

                        <italic toggle="yes">Inf. Sci.</italic>
</source>
                    <year>2021</year>;<volume>557</volume>:<fpage>317</fpage>&#x2013;<lpage>331</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.ins.2019.05.042</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref26">
                <label>26</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Nassif</surname>
                            <given-names>AB</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Talib</surname>
                            <given-names>MA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Nasir</surname>
                            <given-names>Q</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Machine Learning for Anomaly Detection: A Systematic Review.</article-title>
                    <source>

                        <italic toggle="yes">IEEE Access.</italic>
</source>
                    <year>2021</year>;<volume>9</volume>:<fpage>78658</fpage>&#x2013;<lpage>78700</lpage>.
                    <pub-id pub-id-type="doi">10.1109/ACCESS.2021.3083060</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref27">
                <label>27</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Benedek</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ciumas</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Nagy</surname>
                            <given-names>BZ</given-names>
                        </name>
</person-group>:
                    <article-title>Automobile insurance fraud detection in the age of big data &#x2013; a systematic and comprehensive literature review.</article-title>
                    <source>

                        <italic toggle="yes">JFRC.</italic>
</source>
                    <year>2022</year>;<volume>30</volume>:<fpage>503</fpage>&#x2013;<lpage>523</lpage>.
                    <pub-id pub-id-type="doi">10.1108/JFRC-11-2021-0102</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref28">
                <label>28</label>
                <mixed-citation publication-type="journal">
                    <article-title>Fraud Guard: A Comprehensive Comparative Analysis of Machine Learning Approaches to Enhance Credit Card Fraud Detection.</article-title>
                    <source>

                        <italic toggle="yes">JIEA.</italic>
</source>
                    <year>2024</year>.
                    <pub-id pub-id-type="doi">10.7176/JIEA/14-2-02</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref29">
                <label>29</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Sulaiman</surname>
                            <given-names>SS</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Nadher</surname>
                            <given-names>I</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hameed</surname>
                            <given-names>SM</given-names>
                        </name>
</person-group>:
                    <article-title>Credit Card Fraud Detection Using Improved Deep Learning Models.</article-title>
                    <source>

                        <italic toggle="yes">CMC.</italic>
</source>
                    <year>2024</year>;<volume>78</volume>:<fpage>1049</fpage>&#x2013;<lpage>1069</lpage>.
                    <pub-id pub-id-type="doi">10.32604/cmc.2023.046051</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref30">
                <label>30</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Lai</surname>
                            <given-names>G</given-names>
                        </name>
</person-group>:
                    <article-title>Artificial Intelligence Techniques for Fraud Detection.</article-title>
                    <year>2023</year>.
                    <pub-id pub-id-type="doi">10.20944/preprints202312.1115.v1</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref31">
                <label>31</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Adelakun</surname>
                            <given-names>BO</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Onwubuariri</surname>
                            <given-names>ER</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Adeniran</surname>
                            <given-names>GA</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Enhancing fraud detection in accounting through AI: Techniques and case studies.</article-title>
                    <source>

                        <italic toggle="yes">Financ. Account Res. J.</italic>
</source>
                    <year>2024</year>;<volume>6</volume>:<fpage>978</fpage>&#x2013;<lpage>999</lpage>.
                    <pub-id pub-id-type="doi">10.51594/farj.v6i6.1232</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref32">
                <label>32</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Esmaeili</surname>
                            <given-names>F</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Cassie</surname>
                            <given-names>E</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Nguyen</surname>
                            <given-names>HPT</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Anomaly Detection for Sensor Signals Utilizing Deep Learning Autoencoder-Based Neural Networks.</article-title>
                    <source>

                        <italic toggle="yes">Bioengineering.</italic>
</source>
                    <year>2023</year>;<volume>10</volume>:<fpage>405</fpage>.
                    <pub-id pub-id-type="pmid">37106591</pub-id>
                    <pub-id pub-id-type="doi">10.3390/bioengineering10040405</pub-id>
                    <pub-id pub-id-type="pmcid">PMC10136265</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref33">
                <label>33</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Park</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Adosoglou</surname>
                            <given-names>G</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Pardalos</surname>
                            <given-names>PM</given-names>
                        </name>
</person-group>:
                    <article-title>Interpreting Rate-Distortion of Variational Autoencoder and Using Model Uncertainty for Anomaly Detection.</article-title>
                    <year>2020</year>.
                    <pub-id pub-id-type="doi">10.48550/ARXIV.2005.01889</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref34">
                <label>34</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Fraser</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Homiller</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mishra</surname>
                            <given-names>RK</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Challenges for Unsupervised Anomaly Detection in Particle Physics.</article-title>
                    <year>2021</year>.
                    <pub-id pub-id-type="doi">10.48550/ARXIV.2110.06948</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref35">
                <label>35</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kim</surname>
                            <given-names>Y-G</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Park</surname>
                            <given-names>T-H</given-names>
                        </name>
</person-group>:
                    <article-title>Anomaly Detection Using Autoencoder With Feature Vector Frequency Map.</article-title>
                    <source>

                        <italic toggle="yes">IEEE Access.</italic>
</source>
                    <year>2021</year>;<volume>9</volume>:<fpage>73808</fpage>&#x2013;<lpage>73817</lpage>.
                    <pub-id pub-id-type="doi">10.1109/ACCESS.2021.3080330</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref36">
                <label>36</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Zhu</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Jiang</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Liu</surname>
                            <given-names>Z</given-names>
                        </name>
</person-group>:
                    <article-title>Fault Detection and Diagnosis in Industrial Processes with Variational Autoencoder: A Comprehensive Study.</article-title>
                    <source>

                        <italic toggle="yes">Sensors.</italic>
</source>
                    <year>2021</year>;<volume>22</volume>:<fpage>227</fpage>.
                    <pub-id pub-id-type="pmid">35009769</pub-id>
                    <pub-id pub-id-type="doi">10.3390/s22010227</pub-id>
                    <pub-id pub-id-type="pmcid">PMC8749793</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref37">
                <label>37</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ikeda</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ouazzane</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Yu</surname>
                            <given-names>Q</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>New Feature Engineering Framework for Deep Learning in Financial Fraud Detection.</article-title>
                    <source>

                        <italic toggle="yes">IJACSA.</italic>
</source>
                    <year>2021</year>;<volume>12</volume>.
                    <pub-id pub-id-type="doi">10.14569/IJACSA.2021.0121202</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref38">
                <label>38</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Rosley</surname>
                            <given-names>N</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Tong</surname>
                            <given-names>G-K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ng</surname>
                            <given-names>K-H</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <chapter-title>Autoencoders with Reconstruction Error and Dimensionality Reduction for Credit Card Fraud Detection. </chapter-title>
                    <person-group person-group-type="editor">

                        <name name-style="western">
                            <surname>Haw</surname>
                            <given-names>S-C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Sonai Muthu</surname>
                            <given-names>K</given-names>
                        </name>
</person-group>, editors.
                    <source>

                        <italic toggle="yes">Proceedings of the International Conference on Computer, Information Technology and Intelligent Computing (CITIC 2022).</italic>
</source>
                    <publisher-loc>Dordrecht</publisher-loc>:
                    <publisher-name>Atlantis Press International BV</publisher-name>;<year>2022</year>; pp.<fpage>503</fpage>&#x2013;<lpage>512</lpage>.
                    <pub-id pub-id-type="doi">10.2991/978-94-6463-094-7_40</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref39">
                <label>39</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Salekshahrezaee</surname>
                            <given-names>Z</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Leevy</surname>
                            <given-names>JL</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Khoshgoftaar</surname>
                            <given-names>TM</given-names>
                        </name>
</person-group>:
                    <article-title>The effect of feature extraction and data sampling on credit card fraud detection.</article-title>
                    <source>

                        <italic toggle="yes">J. Big Data.</italic>
</source>
                    <year>2023</year>;<volume>10</volume>:<fpage>6</fpage>.
                    <pub-id pub-id-type="doi">10.1186/s40537-023-00684-w</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref40">
                <label>40</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Lin</surname>
                            <given-names>T-H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Jiang</surname>
                            <given-names>J-R</given-names>
                        </name>
</person-group>:
                    <article-title>Credit Card Fraud Detection with Autoencoder and Probabilistic Random Forest.</article-title>
                    <source>

                        <italic toggle="yes">Mathematics.</italic>
</source>
                    <year>2021</year>;<volume>9</volume>:<fpage>2683</fpage>.
                    <pub-id pub-id-type="doi">10.3390/math9212683</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref41">
                <label>41</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Prabha</surname>
                            <given-names>DP</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Priscilla</surname>
                            <given-names>CV</given-names>
                        </name>
</person-group>:
                    <article-title>Probabilistic XGBoost Threshold Classification with Autoencoder for Credit Card Fraud Detection.</article-title>
                    <source>

                        <italic toggle="yes">IJRITCC.</italic>
</source>
                    <year>2023</year>;<volume>11</volume>:<fpage>528</fpage>&#x2013;<lpage>537</lpage>.
                    <pub-id pub-id-type="doi">10.17762/ijritcc.v11i8s.7234</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref42">
                <label>42</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Gomes</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Jin</surname>
                            <given-names>Z</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Yang</surname>
                            <given-names>H</given-names>
                        </name>
</person-group>:
                    <article-title>Insurance fraud detection with unsupervised deep learning.</article-title>
                    <source>

                        <italic toggle="yes">J. Risk Insur.</italic>
</source>
                    <year>2021</year>;<volume>88</volume>:<fpage>591</fpage>&#x2013;<lpage>624</lpage>.
                    <pub-id pub-id-type="doi">10.1111/jori.12359</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref43">
                <label>43</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Bulut</surname>
                            <given-names>O</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gorgun</surname>
                            <given-names>G</given-names>
                        </name>

                        <name name-style="western">
                            <surname>He</surname>
                            <given-names>S</given-names>
                        </name>
</person-group>:
                    <article-title>Unsupervised Anomaly Detection in Sequential Process Data: Insights From PIAAC Problem-Solving Tasks.</article-title>
                    <source>

                        <italic toggle="yes">Z. Psychol.</italic>
</source>
                    <year>2024</year>;<volume>232</volume>:<fpage>74</fpage>&#x2013;<lpage>94</lpage>.
                    <pub-id pub-id-type="doi">10.1027/2151-2604/a000558</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref44">
                <label>44</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Mohamed Elmahalwy</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mousa</surname>
                            <given-names>HM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Amin</surname>
                            <given-names>KM</given-names>
                        </name>
</person-group>:
                    <article-title>New hybrid ensemble method for anomaly detection in data science.</article-title>
                    <source>

                        <italic toggle="yes">IJECE.</italic>
</source>
                    <year>2023</year>;<volume>13</volume>:<fpage>3498</fpage>.
                    <pub-id pub-id-type="doi">10.11591/ijece.v13i3.pp3498-3508</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref45">
                <label>45</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Feng</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Zhang</surname>
                            <given-names>L</given-names>
                        </name>
</person-group>:
                    <article-title>Optimizing the Isolation Forest Algorithm for Identifying Abnormal Behaviors of Students in Education Management Big Data.</article-title>
                    <source>

                        <italic toggle="yes">JAIT.</italic>
</source>
                    <year>2023</year>.
                    <pub-id pub-id-type="doi">10.37965/jait.2023.0445</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref46">
                <label>46</label>
                <mixed-citation publication-type="journal">
                    <collab>Research Scholar</collab>:
                    <article-title>Department of Computer Science, Karpagam Academy of Higher Education, Coimbatore, 641 021, Tamil Nadu, India, Prajesha TM, Veni S. An Efficient Outlier Detection Using Isolation Forest Based on Robust Scaling and Principal Component Analysis for the Prediction of Anxiety Disorder.</article-title>
                    <source>

                        <italic toggle="yes">IJST.</italic>
</source>
                    <year>2023</year>;<volume>16</volume>:<fpage>2244</fpage>&#x2013;<lpage>2251</lpage>.
                    <pub-id pub-id-type="doi">10.17485/IJST/v16i29.638</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref47">
                <label>47</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Fang</surname>
                            <given-names>N</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Fang</surname>
                            <given-names>X</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lu</surname>
                            <given-names>K</given-names>
                        </name>
</person-group>:
                    <article-title>Anomalous Behavior Detection Based on the Isolation Forest Model with Multiple Perspective Business Processes.</article-title>
                    <source>

                        <italic toggle="yes">Electronics.</italic>
</source>
                    <year>2022</year>;<volume>11</volume>:<fpage>3640</fpage>.
                    <pub-id pub-id-type="doi">10.3390/electronics11213640</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref48">
                <label>48</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Hadi</surname>
                            <given-names>MU</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Tashi</surname>
                            <given-names>QA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Qureshi</surname>
                            <given-names>R</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A Survey on Large Language Models: Applications, Challenges, Limitations, and Practical Usage.</article-title>
                    <year>2023</year>.
                    <pub-id pub-id-type="doi">10.36227/techrxiv.23589741.v1</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref49">
                <label>49</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Meduri</surname>
                            <given-names>K</given-names>
                        </name>
</person-group>:
                    <article-title>Cybersecurity threats in banking: Unsupervised fraud detection analysis.</article-title>
                    <source>

                        <italic toggle="yes">Int. J. Sci. Res. Arch.</italic>
</source>
                    <year>2024</year>;<volume>11</volume>:<fpage>915</fpage>&#x2013;<lpage>925</lpage>.
                    <pub-id pub-id-type="doi">10.30574/ijsra.2024.11.2.0505</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref50">
                <label>50</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Liu</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wang</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Sun</surname>
                            <given-names>Y</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Preparation and Optimization of Mesoporous SnO
                        <sub>2</sub> Quantum Dot Thin Film Gas Sensors for H
                        <sub>2</sub>S Detection Using XGBoost Parameter Importance Analysis.</article-title>
                    <source>

                        <italic toggle="yes">Chemosensors.</italic>
</source>
                    <year>2023</year>;<volume>11</volume>:<fpage>525</fpage>.
                    <pub-id pub-id-type="doi">10.3390/chemosensors11100525</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref51">
                <label>51</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Shi</surname>
                            <given-names>F</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lu</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gu</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Modeling and Evaluation of the Permeate Flux in Forward Osmosis Process with Machine Learning.</article-title>
                    <source>

                        <italic toggle="yes">Ind. Eng. Chem. Res.</italic>
</source>
                    <year>2022</year>;<volume>61</volume>:<fpage>18045</fpage>&#x2013;<lpage>18056</lpage>.
                    <pub-id pub-id-type="doi">10.1021/acs.iecr.2c03064</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref52">
                <label>52</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Wang</surname>
                            <given-names>X</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ding</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Chen</surname>
                            <given-names>T</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Research on the Application of Bayesian-Optimized XGBoost in Minor Faults in Coalfields.</article-title>
                    <source>

                        <italic toggle="yes">Math. Probl. Eng.</italic>
</source>
                    <year>2022</year>;<volume>2022</volume>:<fpage>1</fpage>&#x2013;<lpage>13</lpage>.
                    <pub-id pub-id-type="doi">10.1155/2022/3409468</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref53">
                <label>53</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Nam</surname>
                            <given-names>SM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Peterson</surname>
                            <given-names>TA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Seo</surname>
                            <given-names>KY</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Discovery of Depression-Associated Factors From a Nationwide Population-Based Survey: Epidemiological Study Using Machine Learning and Network Analysis.</article-title>
                    <source>

                        <italic toggle="yes">J. Med. Internet Res.</italic>
</source>
                    <year>2021</year>;<volume>23</volume>:<fpage>e27344</fpage>.
                    <pub-id pub-id-type="pmid">34184998</pub-id>
                    <pub-id pub-id-type="doi">10.2196/27344</pub-id>
                    <pub-id pub-id-type="pmcid">PMC8277318</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref54">
                <label>54</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Huang</surname>
                            <given-names>P</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Yan</surname>
                            <given-names>H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Song</surname>
                            <given-names>Z</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Combining autoencoder with clustering analysis for anomaly detection in radiotherapy plans.</article-title>
                    <source>

                        <italic toggle="yes">Quant. Imaging Med. Surg.</italic>
</source>
                    <year>2023</year>;<volume>13</volume>:<fpage>2328</fpage>&#x2013;<lpage>2338</lpage>.
                    <pub-id pub-id-type="pmid">37064364</pub-id>
                    <pub-id pub-id-type="doi">10.21037/qims-22-825</pub-id>
                    <pub-id pub-id-type="pmcid">PMC10102771</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref55">
                <label>55</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Guo</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Yuan</surname>
                            <given-names>Z</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Janson</surname>
                            <given-names>B</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Older Pedestrian Traffic Crashes Severity Analysis Based on an Emerging Machine Learning XGBoost.</article-title>
                    <source>

                        <italic toggle="yes">Sustainability.</italic>
</source>
                    <year>2021</year>;<volume>13</volume>:<fpage>926</fpage>.
                    <pub-id pub-id-type="doi">10.3390/su13020926</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref56">
                <label>56</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Patel</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Singh</surname>
                            <given-names>G</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Zarbiv</surname>
                            <given-names>S</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Mortality Prediction Using SaO
                        <sub>2</sub>/FiO
                        <sub>2</sub> Ratio Based on eICU Database Analysis.</article-title>
                    <source>

                        <italic toggle="yes">Crit. Care Res. Prac.</italic>
</source>
                    <year>2021</year>;<volume>2021</volume>:<fpage>1</fpage>&#x2013;<lpage>9</lpage>.
                    <pub-id pub-id-type="pmid">34790417</pub-id>
                    <pub-id pub-id-type="doi">10.1155/2021/6672603</pub-id>
                    <pub-id pub-id-type="pmcid">PMC8592728</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref57">
                <label>57</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ru</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kujawski</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lee Afanador</surname>
                            <given-names>N</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Predicting Measles Outbreaks in the United States: Evaluation of Machine Learning Approaches (Preprint).</article-title>
                    <year>2022</year>.
                    <pub-id pub-id-type="doi">10.2196/preprints.42832</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref58">
                <label>58</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Esmaeilzadeh</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Salajegheh</surname>
                            <given-names>N</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ziai</surname>
                            <given-names>A</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Abuse and Fraud Detection in Streaming Services Using Heuristic-Aware Machine Learning.</article-title>
                    <year>2022</year>.
                    <pub-id pub-id-type="doi">10.48550/ARXIV.2203.02124</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref59">
                <label>59</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Dong</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Chen</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Peng</surname>
                            <given-names>Y</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <chapter-title>Comparative Study on Supervised versus Semi-supervised Machine Learning for Anomaly Detection of In-vehicle CAN Network.</chapter-title>
                    <source>

                        <italic toggle="yes">2022 IEEE 25th International Conference on Intelligent Transportation Systems (ITSC).</italic>
</source>
                    <publisher-loc>Macau, China</publisher-loc>:
                    <publisher-name>IEEE</publisher-name>;<year>2022</year>; pp.<fpage>2914</fpage>&#x2013;<lpage>2919</lpage>.
                    <pub-id pub-id-type="doi">10.1109/ITSC55140.2022.9922235</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref60">
                <label>60</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Bakumenko</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Elragal</surname>
                            <given-names>A</given-names>
                        </name>
</person-group>:
                    <article-title>Detecting Anomalies in Financial Data Using Machine Learning Algorithms.</article-title>
                    <source>

                        <italic toggle="yes">Systems.</italic>
</source>
                    <year>2022</year>;<volume>10</volume>:<fpage>130</fpage>.
                    <pub-id pub-id-type="doi">10.3390/systems10050130</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref61">
                <label>61</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Aghaee</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Krau</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Tamer</surname>
                            <given-names>IM</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Unsupervised Hybrid Models Integrating Deep Autoencoders and Process Controllers&#x2019; Models for Enhanced Process Monitoring and Fault Detection.</article-title>
                    <source>

                        <italic toggle="yes">Ind. Eng. Chem. Res.</italic>
</source>
                    <year>2024</year>;<volume>63</volume>:<fpage>14748</fpage>&#x2013;<lpage>14760</lpage>.
                    <pub-id pub-id-type="doi">10.1021/acs.iecr.4c01980</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref62">
                <label>62</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Zhang</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Huangfu</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ziada</surname>
                            <given-names>Y</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A Hybrid Fault Detection Method for Hairpin Windings Integrating Physics Model and Machine Learning.</article-title>
                    <source>

                        <italic toggle="yes">IEEE Access.</italic>
</source>
                    <year>2024</year>;<volume>12</volume>:<fpage>70392</fpage>&#x2013;<lpage>70404</lpage>.
                    <pub-id pub-id-type="doi">10.1109/ACCESS.2024.3402224</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref63">
                <label>63</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Giroh</surname>
                            <given-names>H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kumar</surname>
                            <given-names>V</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Singh</surname>
                            <given-names>G</given-names>
                        </name>
</person-group>:
                    <article-title>Improving the Performance of Hybrid Models Using Machine Learning and Optimization Techniques.</article-title>
                    <source>

                        <italic toggle="yes">Ijmst.</italic>
</source>
                    <year>2023</year>;<volume>10</volume>:<fpage>3396</fpage>&#x2013;<lpage>3409</lpage>.
                    <pub-id pub-id-type="doi">10.15379/ijmst.v10i2.3138</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref64">
                <label>64</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Albahlal</surname>
                            <given-names>B</given-names>
                        </name>
</person-group>:
                    <article-title>Emerging Technology-Driven Hybrid Models for Preventing and Monitoring Infectious Diseases: A Comprehensive Review and Conceptual Framework.</article-title>
                    <source>

                        <italic toggle="yes">Diagnostics.</italic>
</source>
                    <year>2023</year>;<volume>13</volume>:<fpage>3047</fpage>.
                    <pub-id pub-id-type="pmid">37835793</pub-id>
                    <pub-id pub-id-type="doi">10.3390/diagnostics13193047</pub-id>
                    <pub-id pub-id-type="pmcid">PMC10572974</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref65">
                <label>65</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Li</surname>
                            <given-names>N</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Chiang</surname>
                            <given-names>F</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Down</surname>
                            <given-names>DG</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A decision integration strategy for short-term demand forecasting and ordering for red blood cell components.</article-title>
                    <year>2020</year>.
                    <pub-id pub-id-type="doi">10.48550/ARXIV.2008.07486</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref66">
                <label>66</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Liao</surname>
                            <given-names>W-W</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hsieh</surname>
                            <given-names>Y-W</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lee</surname>
                            <given-names>T-H</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Machine learning predicts clinically significant health related quality of life improvement after sensorimotor rehabilitation interventions in chronic stroke.</article-title>
                    <source>

                        <italic toggle="yes">Sci. Rep.</italic>
</source>
                    <year>2022</year>;<volume>12</volume>:<fpage>11235</fpage>.
                    <pub-id pub-id-type="pmid">35787657</pub-id>
                    <pub-id pub-id-type="doi">10.1038/s41598-022-14986-1</pub-id>
                    <pub-id pub-id-type="pmcid">PMC9253044</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref67">
                <label>67</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ito</surname>
                            <given-names>G</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Yada</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wakamiya</surname>
                            <given-names>S</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Predictive Model for Extended-Spectrum &#x03b2;-Lactamase&#x2013;Producing Bacterial Infections Using Natural Language Processing Technique and Open Data in Intensive Care Unit Environment: Retrospective Observational Study.</article-title>
                    <source>

                        <italic toggle="yes">JMIR Form Res.</italic>
</source>
                    <year>2024</year>;<volume>8</volume>:<fpage>e54044</fpage>.
                    <pub-id pub-id-type="pmid">38986131</pub-id>
                    <pub-id pub-id-type="doi">10.2196/54044</pub-id>
                    <pub-id pub-id-type="pmcid">PMC11269962</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref68">
                <label>68</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Tan</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ma</surname>
                            <given-names>W</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Sun</surname>
                            <given-names>Y</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Prediction of the Growth Rate of Early-Stage Lung Adenocarcinoma by Radiomics.</article-title>
                    <source>

                        <italic toggle="yes">Front. Oncol.</italic>
</source>
                    <year>2021</year>;<volume>11</volume>:<fpage>658138</fpage>.
                    <pub-id pub-id-type="pmid">33937070</pub-id>
                    <pub-id pub-id-type="doi">10.3389/fonc.2021.658138</pub-id>
                    <pub-id pub-id-type="pmcid">PMC8082461</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref69">
                <label>69</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Sun</surname>
                            <given-names>F</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Han</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wu</surname>
                            <given-names>F</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Development And Validation Of Models To Predict Cesarean Delivery Among Low-Risk Nulliparous Women At Term: A Retrospective Study In China.</article-title>
                    <year>2020</year>.
                    <pub-id pub-id-type="doi">10.21203/rs.3.rs-44296/v1</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref70">
                <label>70</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Tu</surname>
                            <given-names>K-C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Tau</surname>
                            <given-names>ENT</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Chen</surname>
                            <given-names>N-C</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Machine Learning Algorithm Predicts Mortality Risk in Intensive Care Unit for Patients with Traumatic Brain Injury.</article-title>
                    <source>

                        <italic toggle="yes">Diagnostics.</italic>
</source>
                    <year>2023</year>;<volume>13</volume>:<fpage>3016</fpage>.
                    <pub-id pub-id-type="pmid">37761383</pub-id>
                    <pub-id pub-id-type="doi">10.3390/diagnostics13183016</pub-id>
                    <pub-id pub-id-type="pmcid">PMC10528289</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref71">
                <label>71</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Manda</surname>
                            <given-names>VT</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kondapalli</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Malla</surname>
                            <given-names>AS</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Imbalanced Data Challenges and Their Resolution to Improve Fraud Detection in Credit Card Transactions.</article-title>
                    <year>2024</year>.
                    <pub-id pub-id-type="doi">10.21203/rs.3.rs-3962043/v1</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref72">
                <label>72</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Esenogho</surname>
                            <given-names>E</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mienye</surname>
                            <given-names>ID</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Swart</surname>
                            <given-names>TG</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A Neural Network Ensemble With Feature Engineering for Improved Credit Card Fraud Detection.</article-title>
                    <source>

                        <italic toggle="yes">IEEE Access.</italic>
</source>
                    <year>2022</year>;<volume>10</volume>:<fpage>16400</fpage>&#x2013;<lpage>16407</lpage>.
                    <pub-id pub-id-type="doi">10.1109/ACCESS.2022.3148298</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref73">
                <label>73</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Sudhakar</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kaliyamurthie</surname>
                            <given-names>KP</given-names>
                        </name>
</person-group>:
                    <article-title>A Novel Machine learning Algorithms used to Detect Credit Card Fraud Transactions.</article-title>
                    <source>

                        <italic toggle="yes">IJRITCC.</italic>
</source>
                    <year>2023</year>;<volume>11</volume>:<fpage>163</fpage>&#x2013;<lpage>168</lpage>.
                    <pub-id pub-id-type="doi">10.17762/ijritcc.v11i2.6141</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref74">
                <label>74</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ileberi</surname>
                            <given-names>E</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Sun</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wang</surname>
                            <given-names>Z</given-names>
                        </name>
</person-group>:
                    <article-title>Performance Evaluation of Machine Learning Methods for Credit Card Fraud Detection Using SMOTE and AdaBoost.</article-title>
                    <source>

                        <italic toggle="yes">IEEE Access.</italic>
</source>
                    <year>2021</year>;<volume>9</volume>:<fpage>165286</fpage>&#x2013;<lpage>165294</lpage>.
                    <pub-id pub-id-type="doi">10.1109/ACCESS.2021.3134330</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref75">
                <label>75</label>
                <mixed-citation publication-type="journal">
                    <article-title>Assessing the feasibility of machine learning-based modelling and prediction of credit fraud outcomes using hyperparameter tuning.</article-title>
                    <source>

                        <italic toggle="yes">ACSS.</italic>
</source>
                    <year>2023</year>;<volume>7</volume>.
                    <pub-id pub-id-type="doi">10.23977/acss.2023.070212</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref76">
                <label>76</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Trisanto</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Rismawati</surname>
                            <given-names>N</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mulya</surname>
                            <given-names>M</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Effectiveness Undersampling Method and Feature Reduction in Credit Card Fraud Detection.</article-title>
                    <source>

                        <italic toggle="yes">IJIES.</italic>
</source>
                    <year>2020</year>;<volume>13</volume>:<fpage>173</fpage>&#x2013;<lpage>181</lpage>.
                    <pub-id pub-id-type="doi">10.22266/ijies2020.0430.17</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref77">
                <label>77</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Mohammed</surname>
                            <given-names>RA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>A. Bazzi Y.</surname>
                        </name>
</person-group>:
                    <article-title>Implement an Intrusion Detection System Utilizing Machine Learning and Principal Component Analysis.</article-title>
                    <source>

                        <italic toggle="yes">IRJIET.</italic>
</source>
                    <year>2024</year>;<volume>08</volume>:<fpage>01</fpage>&#x2013;<lpage>07</lpage>.
                    <pub-id pub-id-type="doi">10.47001/IRJIET/2024.802001</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref78">
                <label>78</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ezekiel</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Alshehri</surname>
                            <given-names>AA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Pearlstein</surname>
                            <given-names>L</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>IoT Anomaly Detection using Multivariate.</article-title>
                    <source>

                        <italic toggle="yes">IJITEE.</italic>
</source>
                    <year>2020</year>;<volume>9</volume>:<fpage>1662</fpage>&#x2013;<lpage>9</lpage>.
                    <pub-id pub-id-type="doi">10.35940/ijitee.D1323.029420</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref79">
                <label>79</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Zhu</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Zhang</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gong</surname>
                            <given-names>Y</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Enhancing Credit Card Fraud Detection: A Neural Network and SMOTE Integrated Approach.</article-title>
                    <source>

                        <italic toggle="yes">JTPES.</italic>
</source>
                    <year>2024</year>;<volume>4</volume>:<fpage>23</fpage>&#x2013;<lpage>30</lpage>.
                    <pub-id pub-id-type="doi">10.53469/jtpes.2024.04(02).04</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref80">
                <label>80</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ding</surname>
                            <given-names>L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Liu</surname>
                            <given-names>L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wang</surname>
                            <given-names>Y</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>An AutoEncoder enhanced light gradient boosting machine method for credit card fraud detection.</article-title>
                    <source>

                        <italic toggle="yes">PeerJ Comput. Sci.</italic>
</source>
                    <year>2024</year>;<volume>10</volume>:<fpage>e2323</fpage>.
                    <pub-id pub-id-type="pmid">39650410</pub-id>
                    <pub-id pub-id-type="doi">10.7717/peerj-cs.2323</pub-id>
                    <pub-id pub-id-type="pmcid">PMC11623290</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref81">
                <label>81</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Du</surname>
                            <given-names>H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lv</surname>
                            <given-names>L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wang</surname>
                            <given-names>H</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A novel method for detecting credit card fraud problems.</article-title>
                    <source>

                        <italic toggle="yes">PLoS ONE.</italic>
</source>
                    <year>2024</year>;<volume>19</volume>:<fpage>e0294537</fpage>.
                    <pub-id pub-id-type="pmid">38446831</pub-id>
                    <pub-id pub-id-type="doi">10.1371/journal.pone.0294537</pub-id>
                    <pub-id pub-id-type="pmcid">PMC10917329</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref82">
                <label>82</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Alshameri</surname>
                            <given-names>F</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Xia</surname>
                            <given-names>R</given-names>
                        </name>
</person-group>:
                    <article-title>An Evaluation of Variational Autoencoder in Credit Card Anomaly Detection.</article-title>
                    <source>

                        <italic toggle="yes">Big Data Min. Anal.</italic>
</source>
                    <year>2024</year>;<volume>7</volume>:<fpage>718</fpage>&#x2013;<lpage>729</lpage>.
                    <pub-id pub-id-type="doi">10.26599/BDMA.2023.9020035</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref83">
                <label>83</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Wu</surname>
                            <given-names>T</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wang</surname>
                            <given-names>Y</given-names>
                        </name>
</person-group>:
                    <article-title>Locally Interpretable One-Class Anomaly Detection for Credit Card Fraud Detection.</article-title>
                    <year>2022</year>.
                    <pub-id pub-id-type="doi">10.48550/arXiv.2108.02501</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref84">
                <label>84</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ishak</surname>
                            <given-names>NA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ng</surname>
                            <given-names>K-H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Tong</surname>
                            <given-names>G-K</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Mitigating unbalanced and overlapped classes in credit card fraud data with enhanced stacking classifiers system.</article-title>
                    <source>

                        <italic toggle="yes">F1000Res.</italic>
</source>
                    <year>2022</year>;<volume>11</volume>:<fpage>71</fpage>.
                    <pub-id pub-id-type="doi">10.12688/f1000research.73359.1</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref85">
                <label>85</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Benchaji</surname>
                            <given-names>I</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Douzi</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>El Ouahidi</surname>
                            <given-names>B</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Enhanced credit card fraud detection based on attention mechanism and LSTM deep model.</article-title>
                    <source>

                        <italic toggle="yes">J. Big Data.</italic>
</source>
                    <year>2021</year>;<volume>8</volume>:<fpage>151</fpage>.
                    <pub-id pub-id-type="doi">10.1186/s40537-021-00541-8</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref86">
                <label>86</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Shanaa</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Abdallah</surname>
                            <given-names>S</given-names>
                        </name>
</person-group>:
                    <article-title>XRAI: A Hybrid Anomaly Detection Framework for Credit Card Fraud Detection.</article-title>
                    <year>2025</year>.
                    <pub-id pub-id-type="doi">10.5281/ZENODO.15626193</pub-id>
                </mixed-citation>
            </ref>
        </ref-list>
    </back>
    <sub-article article-type="reviewer-report" id="report432010">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.183325.r432010</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Palit</surname>
                        <given-names>Shamik</given-names>
                    </name>
                    <xref ref-type="aff" rid="r432010a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-2999-2408</uri>
                </contrib>
                <aff id="r432010a1">
                    <label>1</label>University of Stirling RAK Campus, Ras Al Khaimah, Ras Al Khaimah, United Arab Emirates</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>27</day>
                <month>11</month>
                <year>2025</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2025 Palit S</copyright-statement>
                <copyright-year>2025</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport432010" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.166350.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve-with-reservations</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>Exact hyperparameter configuration for each model. 
                <list list-type="bullet">
                    <list-item>
                        <p>XGBoost: number of trees, max_depth, learning_rate, subsampling, regularization parameters, etc.</p>
                    </list-item>
                    <list-item>
                        <p>Random Forest: n_estimators, max_features, max_depth, class_weight (if any).</p>
                    </list-item>
                    <list-item>
                        <p>Autoencoder: architecture (layers, hidden sizes, activation functions), optimizer, learning rate, number of epochs, batch size, reconstruction threshold selection.</p>
                    </list-item>
                    <list-item>
                        <p>Isolation Forest: n_estimators, max_samples, contamination, max_features.</p>
                    </list-item>
                </list> Precise description of train/validation/test splitting. 
                <list list-type="bullet">
                    <list-item>
                        <p>Is there a single held-out test set?</p>
                    </list-item>
                    <list-item>
                        <p>Was cross-validation used? If yes, k-fold or repeated stratified?</p>
                    </list-item>
                    <list-item>
                        <p>How is random seeding handled?</p>
                    </list-item>
                </list> Details of how BorderlineSMOTE is applied. 
                <list list-type="bullet">
                    <list-item>
                        <p>Confirm explicitly that SMOTE is applied only on the training folds and 
                            <italic>not</italic> on the test set (to avoid data leakage).</p>
                    </list-item>
                    <list-item>
                        <p>Clarify whether SMOTE is applied before or inside cross-validation loops.</p>
                    </list-item>
                </list> Mathematical or algorithmic description of the hybrid XRAI weighting scheme. 
                <list list-type="bullet">
                    <list-item>
                        <p>How are the outputs of XGBoost, RF, AE, IF combined? 
                            <list list-type="bullet">
                                <list-item>
                                    <p>Simple average? Weighted sum? Threshold on each then voting?</p>
                                </list-item>
                            </list> </p>
                    </list-item>
                    <list-item>
                        <p>How are the weights chosen? 
                            <list list-type="bullet">
                                <list-item>
                                    <p>Manually tuned? Based on validation metrics? Grid search?</p>
                                </list-item>
                            </list> </p>
                    </list-item>
                </list> Conceptually and experimentally, the paper is strong, well-motivated, and technically sound. The main remaining gaps are in the detail level of the methods, particularly around hyperparameters, data splitting, resampling protocol, and ensemble weighting. Once these are clarified, the work will be scientifically solid and reproducible, and in my view, suitable for publication in an applied machine learning or fintech/security venue.</p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>Yes</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>Yes</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Partly</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>Yes</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>Yes</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Partly</p>
            <p>Reviewer Expertise:</p>
            <p>Machine Learning</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above.</p>
        </body>
        <sub-article article-type="response" id="comment15035-432010">
            <front-stub>
                <contrib-group>
                    <contrib contrib-type="author">
                        <name>
                            <surname>Shanaa</surname>
                            <given-names>Mohammad</given-names>
                        </name>
                        <aff>Computer Science, The British University in Dubai, Dubai, Dubai, United Arab Emirates</aff>
                    </contrib>
                </contrib-group>
                <author-notes>
                    <fn fn-type="conflict">
                        <p>
                            <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                    </fn>
                </author-notes>
                <pub-date pub-type="epub">
                    <day>7</day>
                    <month>12</month>
                    <year>2025</year>
                </pub-date>
            </front-stub>
            <body>
                <p>
                    <bold>1) Exact hyperparameter configuration for each model</bold>
                </p>
                <p> 
                    <bold>Response:</bold>
                </p>
                <p> </p>
                <p> The full and exact configurations are available in the public GitHub repository 
                    <ext-link ext-link-type="uri" xlink:href="https://github.com/mohshanaa/XRAI.git">https://github.com/mohshanaa/XRAI.git</ext-link>
                </p>
                <p> Direct link</p>
                <p> 
                    <ext-link ext-link-type="uri" xlink:href="https://github.com/mohshanaa/XRAI/blob/main/Configurations">https://github.com/mohshanaa/XRAI/blob/main/Configurations</ext-link>
                </p>
                <p> </p>
                <p> </p>
                <p> 
                    <bold>2) Precise description of train/validation/test splitting</bold>
                </p>
                <p> 
                    <bold>Response:</bold>
                </p>
                <p> The study uses a 
                    <bold>single stratified 70/30 train&#x2013;test split</bold> of the creditcard.csv dataset. 
                    <list list-type="bullet">
                        <list-item>
                            <p>
                                <bold>70%</bold> of the data is used for model development (training + internal validation as needed).</p>
                        </list-item>
                        <list-item>
                            <p>
                                <bold>30%</bold> is held out 
                                <bold>once</bold> as the final test set.</p>
                        </list-item>
                    </list> Stratification ensures the fraud ratio is preserved in both subsets.</p>
                <p> </p>
                <p> </p>
                <p> 
                    <bold>3) Is there a single held-out test set?</bold>
                </p>
                <p> 
                    <bold>Response:</bold>
                </p>
                <p> Yes. A 
                    <bold>single stratified 30% test set</bold> was held out and never used during training, resampling, weight tuning, or threshold selection. All reported performance metrics are computed on this untouched 30% test portion.</p>
                <p> </p>
                <p> 
                    <bold>4) Was cross-validation used?</bold>
                </p>
                <p> 
                    <bold>Response:</bold>
                </p>
                <p> No. The experiments used a 
                    <bold>single 70/30 stratified hold-out split only</bold>.</p>
                <p> No k-fold, repeated k-fold, or stratified cross-validation was used.</p>
                <p> Any internal tuning was performed on the training portion of the split.</p>
                <p> </p>
                <p> 
                    <bold>5) How is random seeding handled?</bold>
                </p>
                <p> 
                    <bold>Response:</bold>
                </p>
                <p> A consistent 
                    <bold>random_state = 42</bold> was applied to: 
                    <list list-type="bullet">
                        <list-item>
                            <p>the stratified 70/30 split,</p>
                        </list-item>
                        <list-item>
                            <p>BorderlineSMOTE,</p>
                        </list-item>
                        <list-item>
                            <p>stochastic models (Random Forest, XGBoost, Isolation Forest),</p>
                        </list-item>
                        <list-item>
                            <p>and Autoencoder initialization (where applicable).</p>
                        </list-item>
                    </list> This ensures full reproducibility. The seed usage is visible in the GitHub code.</p>
                <p> </p>
                <p> </p>
                <p> 
                    <bold>6) Details of how BorderlineSMOTE is applied</bold>
                </p>
                <p> 
                    <bold>Response:</bold>
                </p>
                <p> The procedure is: 
                    <list list-type="order">
                        <list-item>
                            <p>Perform a 
                                <bold>stratified 70/30 split</bold>.</p>
                        </list-item>
                        <list-item>
                            <p>Apply 
                                <bold>BorderlineSMOTE only to the training 70% subset</bold> (for supervised models).</p>
                        </list-item>
                        <list-item>
                            <p>Leave the 
                                <bold>30% test set untouched</bold>.</p>
                        </list-item>
                        <list-item>
                            <p>Train unsupervised models (Autoencoder, Isolation Forest) on 
                                <bold>non-SMOTE data</bold>, preserving natural anomaly structure.</p>
                        </list-item>
                    </list> This avoids leakage and preserves anomaly boundaries for unsupervised models.</p>
                <p> </p>
                <p> </p>
                <p> 
                    <bold>7) Confirm SMOTE is applied only on the training set</bold>
                </p>
                <p> 
                    <bold>Response:</bold>
                </p>
                <p> We confirm that 
                    <bold>BorderlineSMOTE is applied exclusively to the training 70% subset</bold>.</p>
                <p> The 30% test set is 
                    <italic>never</italic> oversampled or modified.</p>
                <p> No SMOTE-generated samples ever enter evaluation.</p>
                <p> </p>
                <p> </p>
                <p> 
                    <bold>8) Clarify whether SMOTE is applied before or inside cross-validation loops</bold>
                </p>
                <p> 
                    <bold>Response:</bold>
                </p>
                <p> Since 
                    <bold>cross-validation was not used</bold>, BorderlineSMOTE was 
                    <bold>not</bold> applied inside any CV loop.</p>
                <p> It is applied 
                    <bold>only once</bold>, after the 70/30 split, and 
                    <bold>only on the training set</bold>.</p>
                <p> </p>
                <p> </p>
                <p> 
                    <bold>9) How are model outputs combined? Weighted sum? Voting?</bold>
                </p>
                <p> 
                    <bold>Response:</bold>
                </p>
                <p> XRAI uses a 
                    <bold>weighted sum of normalized scores</bold>, not voting or averaging. 
                    <list list-type="bullet">
                        <list-item>
                            <p>XGBoost &amp; RF: use predicted fraud probability</p>
                        </list-item>
                        <list-item>
                            <p>Autoencoder: uses normalized reconstruction error</p>
                        </list-item>
                        <list-item>
                            <p>Isolation Forest: uses normalized anomaly score</p>
                        </list-item>
                    </list> All four are scaled to ([0,1]), weighted, summed, and thresholded.</p>
                <p> </p>
                <p> 
                    <bold>10) How were the weights chosen?</bold>
                </p>
                <p> 
                    <bold>Response:</bold>
                </p>
                <p> Weights were chosen empirically based on the behavior of the individual models and the overall ensemble performance. Higher weights were assigned to XGBoost and Random Forest, which exhibited stronger precision and stability, while lower weights were assigned to the Autoencoder and Isolation Forest to preserve anomaly sensitivity without allowing noisy alerts to dominate the ensemble. The final chosen weights and their implementation are documented in the publicly available GitHub repository.</p>
            </body>
        </sub-article>
    </sub-article>
    <sub-article article-type="reviewer-report" id="report398688">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.183325.r398688</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Paldino</surname>
                        <given-names>Gian Marco</given-names>
                    </name>
                    <xref ref-type="aff" rid="r398688a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-8680-9403</uri>
                </contrib>
                <aff id="r398688a1">
                    <label>1</label>Universit&#x00e9; Libre de Bruxelles, Brussels, Belgium</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>26</day>
                <month>8</month>
                <year>2025</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2025 Paldino GM</copyright-statement>
                <copyright-year>2025</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport398688" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.166350.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>reject</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>The manuscript presents a hybrid model, named XRAI, for credit card fraud detection. The model combines two supervised (XGBoost, Random Forest) and two unsupervised (Autoencoder, Isolation Forest) algorithms, reporting superior performance on the public Kaggle&#x00a0;creditcard.csv&#x00a0;dataset. The problem of fraud detection is of significant practical and academic importance, and the authors' effort to develop a high-performance solution is commendable.</p>
            <p> However, the manuscript in its current form suffers from several major methodological and conceptual issues that must be addressed before it can be considered for indexing. The core concerns relate to the justification of the preprocessing pipeline, the rationale for the ensemble's architecture, and the practical significance of the model's contribution.</p>
            <p> Major Concerns 
                <list list-type="order">
                    <list-item>
                        <p>Fundamental Flaw in Preprocessing Methodology:&#x00a0;A significant methodological concern is the application of Principal Component Analysis (PCA) for dimensionality reduction. The&#x00a0;creditcard.csv&#x00a0;dataset's primary features (V1-V28) are already the result of a PCA transformation, a fact the authors acknowledge. Applying PCA again to these components is conceptually flawed, as it assumes the components are correlated in a way that allows for further linear dimensionality reduction, which is not guaranteed and highly unusual. This step demonstrates a misunderstanding of the dataset's nature and potentially distorts the data's inherent structure. The authors must either remove this step or provide a strong theoretical justification for this unconventional approach.</p>
                    </list-item>
                    <list-item>
                        <p>Unjustified Ensemble Architecture and Model Selection:&#x00a0;The rationale for the specific composition of the XRAI model is unclear and seems arbitrary. 
                            <list list-type="bullet">
                                <list-item>
                                    <p>Inclusion of a Poorly Performing Model:&#x00a0;The authors' own results (Table 1) show that the Isolation Forest model yields extremely low precision (0.0192) and an F1-score (0.0376) for the fraud class, which the authors rightly identify as "impractical" and lacking "practical usefulness." Its inclusion in the final weighted ensemble is counterintuitive and requires justification. A clear explanation is needed as to why a model known to be highly noisy and generate excessive false positives contributes positively to the final ensemble.</p>
                                </list-item>
                                <list-item>
                                    <p>Redundancy of Supervised Models:&#x00a0;The framework includes both XGBoost and Random Forest, which are methodologically similar tree-based ensemble methods. The manuscript does not explain the benefit of using both in the final model rather than selecting the single best-performing supervised algorithm. This adds unnecessary complexity without a clear, stated advantage.</p>
                                </list-item>
                            </list> </p>
                    </list-item>
                    <list-item>
                        <p>Marginal Performance Gain and Novelty of Contribution: 
                            <list list-type="bullet">
                                <list-item>
                                    <p>The manuscript claims to propose a "novel" framework. While the specific four-model combination might be new, the general concept of creating a hybrid model by combining supervised and unsupervised learning for fraud detection is well-established in the literature, as cited by the authors themselves (e.g., Carcillo et al., 2021).</p>
                                </list-item>
                                <list-item>
                                    <p>Furthermore, the performance gain of the complex XRAI model over its best individual component (XGBoost) is marginal. The F1-score for the fraud class improves from 0.9328 to 0.9407&#x2014;a gain of less than one percentage point&#x2014;while the recall remains identical. The authors should clarify the practical significance of this small improvement in light of the model's increased complexity, maintainability, and computational overhead. A simpler ensemble, perhaps combining only XGBoost and the Autoencoder, should be tested and discussed as a more parsimonious alternative.</p>
                                </list-item>
                            </list> </p>
                    </list-item>
                    <list-item>
                        <p>Insufficient Motivation for Imbalance Handling Technique:&#x00a0;The choice of BorderlineSMOTE for handling class imbalance is stated but not motivated. The authors should briefly explain why this specific technique was selected over other common methods (e.g., ADASYN, SMOTE-ENN, random over/under-sampling) and how it is particularly suited for this dataset and model architecture</p>
                    </list-item>
                    <list-item>
                        <p>Generalizability:&#x00a0;The framework is not specific to fraud detection and is tested on a single, anonymized dataset. While this is a limitation of the study, the authors could strengthen the discussion by more clearly positioning the framework as a general anomaly detection pipeline and suggesting how it might be adapted with domain-specific features for other applications, and including performance metrics for other publicly available datasets.</p>
                    </list-item>
                </list>
            </p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>Partly</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>Yes</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Yes</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>No</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>Partly</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Yes</p>
            <p>Reviewer Expertise:</p>
            <p>Credit Card Fraud Detection, Time Series Forecasting, Anomaly Detection</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to state that I do not consider it to be of an acceptable scientific standard, for reasons outlined above.</p>
        </body>
        <sub-article article-type="response" id="comment15034-398688">
            <front-stub>
                <contrib-group>
                    <contrib contrib-type="author">
                        <name>
                            <surname>Shanaa</surname>
                            <given-names>Mohammad</given-names>
                        </name>
                        <aff>Computer Science, The British University in Dubai, Dubai, Dubai, United Arab Emirates</aff>
                    </contrib>
                </contrib-group>
                <author-notes>
                    <fn fn-type="conflict">
                        <p>
                            <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                    </fn>
                </author-notes>
                <pub-date pub-type="epub">
                    <day>7</day>
                    <month>12</month>
                    <year>2025</year>
                </pub-date>
            </front-stub>
            <body>
                <p>We sincerely thank the reviewer for their detailed, insightful, and constructive feedback. We have carefully considered each point and submitted a revised manuscript to clarify the theoretical justification, methodological rationale, and generalizability of the XRAI framework. Below, we provide a point-by-point response to each major concern.</p>
                <p> </p>
                <p> 
                    <bold>1. Fundamental Flaw in Preprocessing Methodology (PCA Application)</bold>
                </p>
                <p> 
                    <bold>Reviewer Comment:</bold>
                </p>
                <p> The creditcard.csv dataset's primary features (V1&#x2013;V28) are already the result of PCA transformation. Applying PCA again to these components is conceptually flawed&#x2026; The authors must either remove this step or provide a strong theoretical justification for this unconventional approach.</p>
                <p> 
                    <bold>Author Response:</bold>
                </p>
                <p> We thank the reviewer for highlighting this important point. We acknowledge that the 
                    <italic>creditcard.csv</italic> dataset already includes PCA-derived features (V1&#x2013;V28). In our framework, PCA was not used to perform an additional feature extraction, but rather as a 
                    <bold>numerical conditioning and normalization step</bold> to ensure that all sub-models within the XRAI ensemble operate on a consistent, decorrelated feature space. This approach is supported by previous studies that emphasize the role of PCA and whitening transformations in improving feature orthogonality, numerical stability, and model convergence (Jolliffe &amp; Cadima, 2016; Kessy, Lewin &amp; Strimmer, 2018).</p>
                <p> To clarify this, we have explicitly revised the manuscript to state:</p>
                <p> &#x201c;Although the 
                    <italic>creditcard.csv</italic> dataset already contains PCA-derived features, we applied an additional PCA/whitening step purely as a normalization and conditioning layer so that all XRAI sub-models operate on a consistent, decorrelated feature space; this use of PCA/whitening for orthogonalization and numerical stability is well-established in the literature (Jolliffe &amp; Cadima, 2016; Kessy, Lewin &amp; Strimmer, 2018).&#x201d;</p>
                <p> </p>
                <p> 
                    <bold>2. Unjustified Ensemble Architecture and Model Selection</bold>
                </p>
                <p> 
                    <bold>Reviewer Comment:</bold>
                </p>
                <p> The rationale for the specific composition of the XRAI model is unclear and seems arbitrary.</p>
                <p> 
                    <bold>Author Response:</bold>
                </p>
                <p> We appreciate this observation and have expanded the discussion to clarify the 
                    <bold>theoretical complementarity</bold> of the selected models. Each model contributes a distinct strength within the hybrid architecture. To clarify this, we have explicitly revised the manuscript to state:</p>
                <p> </p>
                <p> &#x201c;The XRAI framework integrates four complementary models, each contributing distinct capabilities that collectively enhance anomaly-detection performance. The supervised learners, XGBoost and Random Forest, establish strong foundational decision boundaries and improve model stability. XGBoost offers high precision and interpretability, providing calibrated scoring that anchors the ensemble&#x2019;s primary classification behavior, while Random Forest enhances robustness by reducing variance and mitigating overfitting, thereby strengthening generalization. To detect anomalies that supervised models may miss, XRAI incorporates two unsupervised detectors: the Autoencoder, which is highly sensitive to structural irregularities and identifies latent deviations within the data, and the Isolation Forest, which excels at capturing rare or extreme outliers and ensuring broad boundary-level coverage. This structure was inspired by hybrid anomaly-detection principles found in recent research (Carcillo et al., 
                    <italic>Information Sciences</italic>, 2021; Liu et al., 
                    <italic>HBEM</italic>, 2023), which demonstrate that combining precision-oriented supervised models with sensitivity-oriented unsupervised models improves recall without inflating false positives; this justification has been added to the 
                    <italic>Methods</italic> section. Together, these components form a tightly integrated ensemble that delivers a more reliable and comprehensive detection mechanism.&#x201d;</p>
                <p> </p>
                <p> 
                    <bold>3. Inclusion of a Poorly Performing Model (Isolation Forest)</bold>
                </p>
                <p> 
                    <bold>Reviewer Comment:</bold>
                </p>
                <p> The Isolation Forest yields extremely low precision and F1-score. Its inclusion in the final ensemble is counterintuitive and requires justification.</p>
                <p> 
                    <bold>Author Response:</bold>
                </p>
                <p> We agree that Isolation Forest alone performs poorly on imbalanced datasets due to excessive false positives. However, its 
                    <bold>extremely high recall (0.95)</bold> makes it valuable when used in 
                    <italic>weighted ensemble combination</italic>. The low individual precision was 
                    <bold>penalized during ensemble weighting</bold>, but its sensitivity helped ensure that rare fraud instances&#x2014;often missed by purely supervised models&#x2014;were not overlooked.</p>
                <p> This approach is consistent with ensemble learning literature emphasizing the inclusion of high-recall &#x201c;weak detectors&#x201d; to prevent false negatives in rare-event detection tasks (Debener et al., 
                    <italic>Journal of Risk and Insurance</italic>, 2023; Meduri, 
                    <italic>IJ Sci Res Arch</italic>, 2024).</p>
                <p> We have clarified this in the revised manuscript:</p>
                <p> &#x201c;In summary, the Isolation Forest algorithm is a robust method for detecting anomalies in financial datasets, particularly effective in high-dimensional spaces, with parameter tuning playing a critical role in optimizing its performance. Its computational efficiency also makes it well-suited for large datasets, and although it can be individually noisy, its high sensitivity to rare and extreme anomalies remains a valuable asset. For this reason, within the hybrid XRAI framework, the Isolation Forest was assigned a relatively low ensemble weight but retained to ensure broad boundary coverage and strengthen robustness against unseen fraud patterns. Despite its limitations, integrating Isolation Forest with complementary methods in a hybrid ensemble significantly enhances overall anomaly-detection capability.&#x201d;</p>
                <p> 
                    <bold>4. Redundancy of Supervised Models (XGBoost and Random Forest)</bold>
                </p>
                <p> 
                    <bold>Reviewer Comment:</bold>
                </p>
                <p> Including both Random Forest and XGBoost adds unnecessary complexity without clear benefit.</p>
                <p> 
                    <bold>Author Response:</bold>
                </p>
                <p> We thank the reviewer for raising this valid concern. While both models are tree-based, they exhibit complementary learning biases: 
                    <list list-type="bullet">
                        <list-item>
                            <p>
                                <bold>XGBoost</bold> reduces bias via gradient boosting, excelling in fine-grained pattern detection;</p>
                        </list-item>
                        <list-item>
                            <p>
                                <bold>Random Forest</bold> reduces variance via bagging, enhancing stability and resistance to overfitting.</p>
                        </list-item>
                    </list> Their combination thus enhances both precision and generalization &#x2014; a strategy validated in comparative ensemble studies (Murat et al., 
                    <italic>BTOUPhMath</italic>, 2023; Liu, 
                    <italic>HBEM</italic>, 2023). We have revised the &#x201c;Hybrid Integration&#x201d; subsection to explain this rationale explicitly.</p>
                <p> </p>
                <p> 
                    <bold>5. Marginal Performance Gain and Novelty of Contribution</bold>
                </p>
                <p> 
                    <bold>Reviewer Comment:</bold>
                </p>
                <p> The model&#x2019;s performance gain is marginal (&lt;1% F1 improvement) and the novelty claim is overstated.</p>
                <p> 
                    <bold>Author Response:</bold>
                </p>
                <p> We acknowledge that the numerical F1 gain appears modest; however, in 
                    <bold>highly imbalanced domains</bold>, even fractional improvements can translate into substantial real-world cost savings. For example, in large-scale financial operations, a 0.8% improvement in fraud detection precision can prevent hundreds of false alerts per million transactions, enhancing customer trust and reducing manual review overhead.</p>
                <p> Furthermore, the 
                    <bold>novelty</bold> of the study lies not solely in the combination of four algorithms but in: 
                    <list list-type="order">
                        <list-item>
                            <p>The 
                                <bold>weighted ensemble optimization mechanism</bold> that balances supervised and unsupervised outputs.</p>
                        </list-item>
                        <list-item>
                            <p>The 
                                <bold>open-source reproducibility framework</bold> (GitHub) ensuring transparency and reusability.</p>
                        </list-item>
                        <list-item>
                            <p>The focus on 
                                <bold>real-time operational applicability</bold> of hybrid detection pipelines.</p>
                        </list-item>
                    </list> These contributions align with F1000Research&#x2019;s emphasis on 
                    <italic>reproducibility and practical impact</italic>. We have revised the &#x201c;Conclusion and Future Work&#x201d; section to emphasize these aspects more clearly.</p>
                <p> </p>
                <p> 
                    <bold>6. Motivation for Using BorderlineSMOTE</bold>
                </p>
                <p> 
                    <bold>Reviewer Comment:</bold>
                </p>
                <p> The choice of BorderlineSMOTE is stated but not motivated.</p>
                <p> 
                    <bold>Author Response:</bold>
                </p>
                <p> We appreciate this important point and have added a detailed explanation.</p>
                <p> BorderlineSMOTE was selected because 
                    <bold>fraudulent transactions in credit card data typically occur near class decision boundaries</bold>, making standard SMOTE or ADASYN less effective. BorderlineSMOTE focuses on minority samples close to the borderline region, generating more realistic synthetic examples and improving model sensitivity without introducing noise.</p>
                <p> This choice follows findings by Han et al. (2005) and recent credit card fraud studies such as Noviandy et al. (2023) and Zhang et al. (2022), which demonstrated superior F1-scores using BorderlineSMOTE for imbalanced fraud data. The rationale and supporting citations have been added to the &#x201c;Data Preprocessing and Class Imbalance&#x201d; section.</p>
                <p> </p>
                <p> 
                    <bold>7. Generalizability and Broader Applications</bold>
                </p>
                <p> 
                    <bold>Reviewer Comment:</bold>
                </p>
                <p> The framework was tested on a single dataset and may not generalize well.</p>
                <p> 
                    <bold>Author Response:</bold>
                </p>
                <p> We fully agree and appreciate this suggestion. We have revised the Conclusion section to position XRAI as a 
                    <bold>general anomaly detection framework</bold> adaptable to multiple domains beyond fraud detection.</p>
                <p> We have added the following text:</p>
                <p> &#x201c;Although validated on the 
                    <italic>creditcard.csv</italic> dataset, XRAI&#x2019;s architecture is domain-agnostic and can be readily adapted to other anomaly detection contexts such as cybersecurity intrusion detection, insurance fraud, and healthcare anomaly analysis. Future work will involve evaluating the model on diverse benchmark datasets, including IEEE-CIS Fraud Detection and UNSW-NB15, to establish its cross-domain generalizability.&#x201d;</p>
                <p> </p>
                <p> </p>
                <p> &#x00b7;&#x00a0; Carcillo, F., Le Borgne, Y.-A., Caelen, O., Bontempi, G. (2019). Combining unsupervised and supervised learning in credit card fraud detection. 
                    <italic>Information Sciences</italic>, 
                    <bold>557</bold>, 317&#x2013;331. 
                    <ext-link ext-link-type="uri" xlink:href="https://www.sciencedirect.com/science/article/abs/pii/S0020025519304451?utm_source=chatgpt.com">ScienceDirect</ext-link>
                </p>
                <p> &#x00b7;&#x00a0; Han, H., Wang, W.-Y. &amp; Mao, B.-H. (2005). Borderline-SMOTE: A new over-sampling method in imbalanced data sets learning. In 
                    <italic>Advances in Intelligent Computing</italic> (ICIC 2005), LNCS 3644, 878&#x2013;887. Springer. 
                    <ext-link ext-link-type="uri" xlink:href="https://link.springer.com/chapter/10.1007/11538059_91?utm_source=chatgpt.com">SpringerLink</ext-link>
                </p>
                <p> &#x00b7;&#x00a0; Huang, Y., Wang, S., Hu, Y. &amp; et al. (2021). A robust anomaly detection algorithm based on principal component analysis. 
                    <italic>Intelligent Data Analysis</italic>, 
                    <bold>25</bold>(6), 1331&#x2013;1348. 
                    <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3233/IDA-195054">https://doi.org/10.3233/IDA-195054</ext-link>. 
                    <ext-link ext-link-type="uri" xlink:href="https://journals.sagepub.com/doi/abs/10.3233/IDA-195054?utm_source=chatgpt.com">SAGE Journals</ext-link>
                </p>
                <p> &#x00b7;&#x00a0; Jolliffe, I.T. &amp; Cadima, J. (2016). Principal component analysis: A review and recent developments. 
                    <italic>Philosophical Transactions of the Royal Society A</italic>, 
                    <bold>374</bold>(2065), 20150202. 
                    <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1098/rsta.2015.0202">https://doi.org/10.1098/rsta.2015.0202</ext-link>. 
                    <ext-link ext-link-type="uri" xlink:href="https://pmc.ncbi.nlm.nih.gov/articles/PMC4792409/?utm_source=chatgpt.com">PMC</ext-link>
                </p>
                <p> &#x00b7;&#x00a0; Kessy, A., Lewin, A. &amp; Strimmer, K. (2018). Optimal whitening and decorrelation. 
                    <italic>The American Statistician</italic>, 
                    <bold>72</bold>(4), 309&#x2013;314. 
                    <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1080/00031305.2016.1277159">https://doi.org/10.1080/00031305.2016.1277159</ext-link>. 
                    <ext-link ext-link-type="uri" xlink:href="https://ideas.repec.org/a/taf/amstat/v72y2018i4p309-314.html?utm_source=chatgpt.com">IDEAS/RePEc</ext-link>
                </p>
            </body>
        </sub-article>
    </sub-article>
</article>
