<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN" "http://jats.nlm.nih.gov/publishing/1.2/JATS-journalpublishing1.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="1.2" xml:lang="en">
    <front>
        <journal-meta>
            <journal-id journal-id-type="pmc">F1000Research</journal-id>
            <journal-title-group>
                <journal-title>F1000Research</journal-title>
            </journal-title-group>
            <issn pub-type="epub">2046-1402</issn>
            <publisher>
                <publisher-name>F1000 Research Limited</publisher-name>
                <publisher-loc>London, UK</publisher-loc>
            </publisher>
        </journal-meta>
        <article-meta>
            <article-id pub-id-type="doi">10.12688/f1000research.169456.2</article-id>
            <article-categories>
                <subj-group subj-group-type="heading">
                    <subject>Research Article</subject>
                </subj-group>
                <subj-group>
                    <subject>Articles</subject>
                </subj-group>
            </article-categories>
            <title-group>
                <article-title>A Differential Evolution-Based Optimized Ensemble for Balanced and Imbalanced Medical Datasets</article-title>
                <fn-group content-type="pub-status">
                    <fn>
                        <p>[version 2; peer review: 2 approved]</p>
                    </fn>
                </fn-group>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Das</surname>
                        <given-names>Surajit</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Software</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Nayak</surname>
                        <given-names>Samaleswari P.</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Supervision</role>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Sahoo</surname>
                        <given-names>Biswajit</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Funding Acquisition</role>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Validation</role>
                    <uri content-type="orcid">https://orcid.org/0000-0003-1355-3395</uri>
                    <xref ref-type="corresp" rid="c1">a</xref>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Champati Rai</surname>
                        <given-names>Satyananda</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Visualization</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-4237-4591</uri>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <aff id="a1">
                    <label>1</label>School of Computer Engineering, Kalinga Institute of Industrial Technology, Bhubaneswar, Odisha, 751024, India</aff>
                <aff id="a2">
                    <label>2</label>Department of Computer Science and Engineering, Silicon University, Bhubaneswar, Odisha, 751024, India</aff>
            </contrib-group>
            <author-notes>
                <corresp id="c1">
                    <label>a</label>
                    <email xlink:href="mailto:bsahoofcs@kiit.ac.in">bsahoofcs@kiit.ac.in</email>
                </corresp>
                <fn fn-type="conflict">
                    <p>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>27</day>
                <month>1</month>
                <year>2026</year>
            </pub-date>
            <pub-date pub-type="collection">
                <year>2025</year>
            </pub-date>
            <volume>14</volume>
            <elocation-id>1003</elocation-id>
            <history>
                <date date-type="accepted">
                    <day>22</day>
                    <month>1</month>
                    <year>2026</year>
                </date>
            </history>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2026 Das S et al.</copyright-statement>
                <copyright-year>2026</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access article distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <self-uri content-type="pdf" xlink:href="https://f1000research.com/articles/14-1003/pdf"/>
            <abstract>
                <sec>
                    <title>Background</title>
                    <p>Class imbalance is a frequent and severe problem in medical datasets, where instances from the minority class are usually high risk or disease positive. Most traditional classifiers suffer from a biasness towards the majority class, resulting in a poor detection rate of the minority class and, therefore, decreased confidence in prediction systems in medical applications.</p>
                </sec>
                <sec>
                    <title>Methods</title>
                    <p>In this paper, we present an optimized ensemble by differential evolution (OEDE), a novel ensemble learning framework, to address this problem. OEDE harmonizes three dissimilar base learners (Logistic Regression, Random Forest, and XGBoost) and trains each using class-balancing techniques. Next, the model utilized Differential Evolution (DE) to discover the most appropriate ensemble weights to maximize the area under the ROC curve (AUC) on a validation dataset.</p>
                </sec>
                <sec>
                    <title>Result</title>
                    <p>We conducted experiments on four real-world medical datasets, whose imbalance ratios vary from 1.89 to 14.6, using OEDE in the original, SMOTE, and ADASYN balanced conditions. Experimental results demonstrate substantial performance gain of OEDE on the challenging Thoracic dataset, achieving a 70.08% AUC, outperforming the standard Random Forest (50.82%) and AdaBoost (47.15%) baselines by over 19%. Additionally, on the Cervical Cancer dataset, the model achieved a peak AUC of 97.89%. The results indicate that the proposed OEDE consistently outperforms or is competitive with traditional ensemble models in terms of AUC, F1-score, and Recall. ROC curve analysis also approved the OEDE&#x2019;s superior discriminative capabilities.</p>
                </sec>
                <sec>
                    <title>Conclusion</title>
                    <p>The proposed OEDE framework effectively improves minority class detection in imbalance medical datasets. Its robust and flexible design makes it a promising tool for healthcare risk prediction tasks where minority class groups need to be well identified.</p>
                </sec>
            </abstract>
            <kwd-group kwd-group-type="author">
                <kwd>Ensemble Learning</kwd>
                <kwd>Differential Evolution</kwd>
                <kwd>Class Imbalance</kwd>
                <kwd>AUC Optimization</kwd>
                <kwd>SMOTE</kwd>
                <kwd>ADASYN.</kwd>
            </kwd-group>
            <funding-group>
                <award-group id="fund-1" xlink:href="https://doi.org/10.13039/501100020612">
                    <funding-source>Kalinga Institute of Industrial Technology</funding-source>
                </award-group>
                <funding-statement>The author(s) declared that no grants were involved in supporting this work.</funding-statement>
            </funding-group>
        </article-meta>
        <notes>
            <sec sec-type="version-changes">
                <label>Revised</label>
                <title>Amendments from Version 1</title>
                <p>We have revised the manuscript basedon constructive comments from the reviewer to make it clearer and more precise. We have made changes to the abstract: Rephrasing of subjective descriptions into actual results, and small confidence intervals are reported for the performance gaps left in the Thoracic and Cervical Cancer datasets. In this sense, the research gap is redefined as a "methodological gap," now supported by new citations that show the effectiveness of traditional gradient-based algorithms at optimising non-differentiable metrics such as AUC. We have also improved the discussion of novelty in the contributions and conclusion sections, removing emphasis on components (e.g., SMOTE, or particular base learners), and instead focusing on our novel OEDE integration framework that uniquely employs&#x00a0;Differential Evolution for direct optimization of ensemble weights.</p>
            </sec>
        </notes>
    </front>
    <body>
        <sec id="sec5" sec-type="intro">
            <title>1. Introduction</title>
            <p>Machine learning has become increasingly popular in medical and healthcare services in recent years because it can be employed to analyze multidimensional datasets and detect subtle patterns that may not be detectable using traditional standard statistical methods.
                <sup>
                    <xref ref-type="bibr" rid="ref1">1</xref>,
                    <xref ref-type="bibr" rid="ref2">2</xref>
                </sup> Disease diagnosis, estimation of survival, and risks using machine learning models are becoming more common in assisting clinical decision-making. However, one of the most common and serious issues in medical datasets is class imbalance, in which an individual class label with a smaller number of instances occurs more infrequently than the others.
                <sup>
                    <xref ref-type="bibr" rid="ref3">3</xref>
                </sup> As a result, traditional models may be biased in favour of the majority class, which may lead to a loss of sensitivity and misclassification of rare but significant minorities.</p>
            <p>Many real-world medical datasets, such as cancer prediction, surgical outcomes, and disease screening, suffer from moderate to severe class imbalance. Models trained on imbalanced data often achieve high overall accuracy because they only predict the majority class but may have a poor generalization to the minority class. This is a problem in healthcare, where a missed positive result can be catastrophic. People face this challenge with traditional resampling methods such as SMOTE
                <sup>
                    <xref ref-type="bibr" rid="ref4">4</xref>
                </sup> and ADASYN
                <sup>
                    <xref ref-type="bibr" rid="ref5">5</xref>
                </sup> and ensemble methods
                <sup>
                    <xref ref-type="bibr" rid="ref6">6</xref>
                </sup> such as AdaBoost (AB), CatBoost (CB), Gradient Boost (GB), XGBoost (XGB), Random Forest (RF), Balanced Random Forest (BRF), LightGBM (LGBM), Easy Ensemble (EE), and Extra Trees (ET). While previous research has shown increased classification capabilities, they are still constrained by static or heuristic based ensemble integration algorithms such as simple averaging
                <sup>
                    <xref ref-type="bibr" rid="ref13">7</xref>
                </sup> or static weights,
                <sup>
                    <xref ref-type="bibr" rid="ref17">8</xref>
                </sup> that lack the freedom to adaptive weight initialization of the base learners. Additionally existing approaches typically rely on minimizing surrogate loss functions
                <sup>
                    <xref ref-type="bibr" rid="ref20">9</xref>
                </sup>
                <sup>,</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref26">10</xref>
                </sup> rather than directly maximizing non-differentiable, clinically relevant evaluation matrices such as AUC, which is critical for robust performance evaluation on imbalanced medical dataset.</p>
            <p>For imbalance classification, there is still a gap in ensemble model design, where most models use static or heuristic weights for the base models. Hence, an adaptive ensemble model is needed that incorporates class balance into training and learns to correctly weight the outputs of the base learners. To address these goals, we proposed a novel ensemble model called Optimized Ensemble by Differential Evolution (OEDE) using an adaptive weighted ensemble, where the optimal weights are determined by Differential Evolution (DE). The main contributions of this study are as follows:
                <list list-type="bullet">
                    <list-item>
                        <label>&#x2022;</label>
                        <p>A novel ensemble architecture that employs Differential Evolution fro direct maximization of non-differentiable matrix like AUC to bypass the gradient-based meta learner&#x2019;s limitations.</p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>A dynamic weight evolution strategy that adapts to dataset imbalance prioritizing base learners that effectively capture minority classes.</p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>Empirical evidences show robustness of OEDE across different imbalance ratios (1.89 &#x2013; 14.6), outperforming traditional ensembles in high-imbalance scenarios.</p>
                    </list-item>
                </list>
            </p>
        </sec>
        <sec id="sec6">
            <title>2. Related work</title>
            <p>Dey and Pratap
                <sup>
                    <xref ref-type="bibr" rid="ref7">11</xref>
                </sup> studied different oversampling techniques, such as SMOTE, Borderline-SMOTE, and ADASYN, on different statistical models, such as SVM, KNN, GNB, DT, and RF, concluding that RF combined with SMOTE outperforms others. T.-C.T. Chen et al.
                <sup>
                    <xref ref-type="bibr" rid="ref8">12</xref>
                </sup> used an ensemble approach for the classification of diabetes, where DNN performed the classification, and a modified RF was used to explain the results of the proposed model. MART&#x00cd;NEZ-VELASCO et al.
                <sup>
                    <xref ref-type="bibr" rid="ref9">13</xref>
                </sup> have used both oversampling and under-sampling along with 8 different ML models and concluded that the balanced bagging and balanced RF (BRF) beat every other setup, even without balancing the dataset. To address this class imbalance problem, Agyemang et al.
                <sup>
                    <xref ref-type="bibr" rid="ref10">14</xref>
                </sup> used several oversampling techniques such as Random Oversampling (RO), SMOTE, SMOTE-Tomek, and ADASYN, along with ML models such as K-Nearest Neighbour (KNN), Support Vector Machine (SVM), Logistic Regression (LR), Random Forest (RF), and Decision Tree (DT), and concluded that RO-SVM gave the best result. Abayomi-Alli et al.
                <sup>
                    <xref ref-type="bibr" rid="ref11">15</xref>
                </sup> proposed a 2-phase ensemble model combining DNN with 15 other ML models (ExtraTrees, SVM, RBF, etc.) for COVID-19 classification, showing that the DNN-ExtraTrees ensemble performed better than the other combinations.</p>
            <p>Elgendy et al.
                <sup>
                    <xref ref-type="bibr" rid="ref12">16</xref>
                </sup> used a stacking-based ensemble of seven base models for diabetes prediction and concluded that the stacked multilayer perceptron (MLP) provides the highest accuracy. Dutta et al.
                <sup>
                    <xref ref-type="bibr" rid="ref13">7</xref>
                </sup> applied weighted average ensemble strategies with GNB, BNB, RF, DT, XGB, and LGB and concluded that the DT+RF+XGB+LGB pair achieves 73.5% accuracy, which is the highest among all other pairs. Alzakari et al.
                <sup>
                    <xref ref-type="bibr" rid="ref14">17</xref>
                </sup> proposed a two-stage ensemble combining XGBoost and Bi-LSTM, where XGBoost performs feature selection and early classification and Bi-LSTM performs second stage classification and pattern recognition. Das et al.
                <sup>
                    <xref ref-type="bibr" rid="ref15">18</xref>
                </sup> conducted studies of different ML models in different class-imbalanced datasets and concluded that RF performs remarkably well in both balanced and imbalanced datasets. Senthilvadivu et al.
                <sup>
                    <xref ref-type="bibr" rid="ref16">19</xref>
                </sup> used RF and XGB for decision making in ICU patients and showed that XGB performs better than RF.</p>
            <p>For the prediction of heart disease, Abdellatif et al.
                <sup>
                    <xref ref-type="bibr" rid="ref17">8</xref>
                </sup> proposed a weighted random forest ensemble model, used along with an infinite feature selection strategy, and concluded that the proposed model performed better than the SMOTE-RF combination. Yalin et al.
                <sup>
                    <xref ref-type="bibr" rid="ref18">20</xref>
                </sup> proposed the XGBoost-BLR method for the classification of diabetes, where XGBoost is used to transform selected features into higher dimensions, and binary logistic regression (BLR) was used for modelling the higher dimensional data. Abnoosian el al.
                <sup>
                    <xref ref-type="bibr" rid="ref19">21</xref>
                </sup> used a normalized weighted ensemble to aggregate the results of six different ML models for the classification of diabetes, showing that the proposed ensemble model performed better than the individual base models. Liu et al.
                <sup>
                    <xref ref-type="bibr" rid="ref20">9</xref>
                </sup> used bagging to overcome the problem of an imbalanced dataset, where LR was used for feature selection, and SVM was used as a weak classifier. For heart disease prediction, Masruriyah et al.
                <sup>
                    <xref ref-type="bibr" rid="ref21">22</xref>
                </sup> used SMOTE and ADASYN along with four ML models and concluded that the oversampling techniques cause a reduction in the accuracy of the model.</p>
            <p>Pablo et al.
                <sup>
                    <xref ref-type="bibr" rid="ref22">23</xref>
                </sup> performed an analysis of different ML models and sampling techniques on the COVID-19 dataset and concluded that MLP stood out strongly in all combinations, whereas SVM gave lower performance in all combinations. To classify COVID-19, Chowdhury et al.
                <sup>
                    <xref ref-type="bibr" rid="ref23">24</xref>
                </sup> proposed a two-step ensemble pipeline with four different ML models, where the results of KNN, SMV, and XGB were passed through RF for final prediction, and concluded that the proposed model outperformed the existing models. Prithula et al.
                <sup>
                    <xref ref-type="bibr" rid="ref24">25</xref>
                </sup> proposed a stacking ensemble with ET, RF, and CB as base models and GB as a meta-learner, and concluded that CB outperforms the proposed model. A study by Chowdhury et al.
                <sup>
                    <xref ref-type="bibr" rid="ref25">26</xref>
                </sup> Performance analysis of different ML models was performed in the original dataset, and after the dataset was balanced using oversampling, under-sampling, and hybrid-sampling techniques, it was shown that the ML models performed better in the original dataset, except that there was a minor improvement in recall after the dataset was balanced. To handle the imbalanced dataset, Mienye and Sun
                <sup>
                    <xref ref-type="bibr" rid="ref26">10</xref>
                </sup> proposed four cost-effective ML models by tuning the hyperparameters and concluded that cost-sensitive XGB performs better than the other models.</p>
            <p>In summary (as shown in 
                <xref ref-type="table" rid="T1">
Table 1</xref>), various strategies have been explored to address the class imbalance problem. Oversampling methods such as SMOTE and ADASYN, under-sampling techniques such as RUS and ENN, and hybrid sampling techniques such as SMOTE-ENN and SMOTE-Tomek have been widely used to improve the performance of the models. Many studies have shown that ensemble models can improve the performance by assembling multiple base models. In fact, models such as RF and XGBoost are highly effective across different healthcare applications, as shown in different studies. Overall, the literature suggests that there is no universal solution, and that the selection of the technique is typically based on the nature of the dataset.</p>
            <table-wrap id="T1" orientation="portrait" position="float">
                <label>
Table 1. </label>
                <caption>
                    <title>Literature review.</title>
                </caption>
                <table content-type="article-table" frame="hsides">
                    <thead>
                        <tr>
                            <th align="left" colspan="1" rowspan="1" valign="top">Author</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Disease</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Models</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Is the original dataset imbalanced</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Imbalance handle strategy</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Observation</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Dey and Pratap
                                <sup>
                                    <xref ref-type="bibr" rid="ref7">11</xref>
                                </sup>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Diabetes and Breast Cancer</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">SVM, KNN, GNB, DT, RF</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Yes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">SMOTE, Borderline-SMOTE, ADASYN</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">RF in combination with SMOTE outperforms others.</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Chen et al.
                                <sup>
                                    <xref ref-type="bibr" rid="ref8">12</xref>
                                </sup>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Diabetes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">DNN, RF</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Yes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">-</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">RF is used to explain the result of DNN.</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Mart&#x00ed;nez-velasco et al.
                                <sup>
                                    <xref ref-type="bibr" rid="ref9">13</xref>
                                </sup>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Age-Related Macular Degeneration and Preeclampsia</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Balanced Bagging, BRF, RF, GB, KNN, LR, SVM, DT</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Yes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">SMOTE and Under-sampling
</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Balanced bagging and BRF outperform others even in imbalanced datasets.</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Agyemang et al.
                                <sup>
                                    <xref ref-type="bibr" rid="ref10">14</xref>
                                </sup>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Stroke</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">KNN, SVM, LR, RF, DT</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Yes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">RO, ADASYN, SMOTE, SMOTE&#x2013;Tomek</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">SVM with Random Oversampling performs better.</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Abayomi-Alli et al.
                                <sup>
                                    <xref ref-type="bibr" rid="ref11">15</xref>
                                </sup>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">COVID-19</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">DNN, ExtraTrees, SVM, RBF etc.</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Yes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">SMOTE</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">DNN-ExtraTrees ensemble outperforms others.</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Elgendy et al.
                                <sup>
                                    <xref ref-type="bibr" rid="ref12">16</xref>
                                </sup>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Diabetes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">LR, RF, MLP, AB, GB etc.</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Yes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">SMOTE</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">MLP with Staking gives the highest accuracy.</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Dutta et al.
                                <sup>
                                    <xref ref-type="bibr" rid="ref13">7</xref>
                                </sup>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Diabetes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">GNB, BNB, RF, DT, XGB, LGB</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Yes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">-</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">DT, RF, XGB, and LGB combination. performance than others.</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Alzakari et al.
                                <sup>
                                    <xref ref-type="bibr" rid="ref14">17</xref>
                                </sup>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Heart Disease</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">XGB, Bi-LSTM</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Yes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">-</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">XGB performs feature selection and early classification, and Bi-LSTM performs second-stage classification.</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Das et al.
                                <sup>
                                    <xref ref-type="bibr" rid="ref15">18</xref>
                                </sup>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Diabetes, Cancer</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">RF, XGB, AB, CB etc.</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Yes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">SMOTE</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">RF performs well in both balanced and imbalance datasets.</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Senthilvadivu et al.
                                <sup>
                                    <xref ref-type="bibr" rid="ref16">19</xref>
                                </sup>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">ICU Condition</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">RF, XGB</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Yes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">-</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">XGB performs better than RF.</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Abdellatif et al.
                                <sup>
                                    <xref ref-type="bibr" rid="ref17">8</xref>
                                </sup>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Heart Disease</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">RF</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Yes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">SMOTE</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Proposed weighted RF performs better than SMOTE-RF.</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Yalin et al.
                                <sup>
                                    <xref ref-type="bibr" rid="ref18">20</xref>
                                </sup>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Diabetes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">LR, XGB</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Yes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">-</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">The proposed ensemble outperforms other base models.</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Abnoosian et al.
                                <sup>
                                    <xref ref-type="bibr" rid="ref19">21</xref>
                                </sup>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Diabetes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">KNN, SVM, DT, RF, AB, GNB</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Yes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">-</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">The proposed ensemble outperforms the base models.</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Liu et al.
                                <sup>
                                    <xref ref-type="bibr" rid="ref20">9</xref>
                                </sup>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Cardiovascular disease</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">LR, SVM, Bagging</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Yes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Undersampling</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">SVM is used as a weak learner for bagging.</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Masruriyah et al.
                                <sup>
                                    <xref ref-type="bibr" rid="ref21">22</xref>
                                </sup>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Heart Disease</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">C4.5, RF, SVM, LR</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Yes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">SMOTE, ADASYN</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Oversampling decreases the model's performance.</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Pablo et al.
                                <sup>
                                    <xref ref-type="bibr" rid="ref22">23</xref>
                                </sup>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">COVID-19</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">MLP, XGB, NB, DT, SVM</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Yes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">POS, RUS, SMOTE, ADASYN</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">MLP stood out strongly for all experimental setups.</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Chowdhury et al.
                                <sup>
                                    <xref ref-type="bibr" rid="ref23">24</xref>
                                </sup>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">COVID-19</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">KNN, SVM, RF, XGB</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Yes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">SMOTE</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">The proposed pipeline outperforms other pairs and the base models.</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Prithula et al.
                                <sup>
                                    <xref ref-type="bibr" rid="ref24">25</xref>
                                </sup>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Respiratory diseases</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">MLP, XGB, DT, SVM, AB CB etc</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Yes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">SMOTE</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">CB performs better than the proposed model.</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Chowdhury et al.
                                <sup>
                                    <xref ref-type="bibr" rid="ref25">26</xref>
                                </sup>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Diabetes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">LR, RF, AB, GB, Voting</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Yes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">ENN, SOMTE_N, SMOTE-ENN, SMOTE-Tomek
</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">The performance of the models is better in original dataset.</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Mienye and Sun
                                <sup>
                                    <xref ref-type="bibr" rid="ref26">10</xref>
                                </sup>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Diabetes, Cancer, CKD</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">LR, DT, XGB, RF</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Yes</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">-</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">Cost-sensitive XGB performs better than others.</td>
                        </tr>
                    </tbody>
                </table>
            </table-wrap>
        </sec>
        <sec id="sec7">
            <title>3. Proposed methodology</title>
            <p>To enhance the prediction accuracy on an imbalanced dataset, we propose a novel ensemble approach called Optimized Ensemble by Differential Evolution (OEDE). An overview of the proposed methodology is shown in 
                <xref ref-type="fig" rid="f1">
Figure 1</xref>. Four different medical datasets with different imbalance ratio (1.89 to 14.6) were collected from the UCI Machine Learning Repository to assess the robustness of the model. After data preprocessing, a stratified train-test split was applied to maintain the class distribution in the training, test, and valuation sets. Logistic Regression (LR), Random Forest (RF), and XGBoost (XGB) were used as the base models, and Differential Evolution (DE) was used to combine their predictive strength based on prediction probability on the validation set, with Area Under the ROC curve (AUC) as the optimization objective. Before constructing the final model, the base learners were fine-tuned using GridSearchCV and Stratified-K-Fold cross-validation to ensure a robust model under class imbalance. The performance of OEDE was tested on the original imbalanced dataset, dataset balanced with SMOTE, and ADASYN. A performance comparison was performed against existing ensemble models such as AB, CB, LGBM, ET, EE, and RBF.</p>
            <fig fig-type="figure" id="f1" orientation="portrait" position="float">
                <label>
Figure 1. </label>
                <caption>
                    <title>Proposed methodology.</title>
                </caption>
                <graphic id="gr1" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/195381/4d34d116-592a-4f89-a5e6-e4b9d448aa71_figure1.gif"/>
            </fig>
            <sec id="sec8">
                <title>3.1 Datasets</title>
                <p>To evaluate the robustness and adaptability of the proposed OEDE model, we used four widely used public medical datasets from the UCI Machine Learning Repository, with different class Imbalance Ratios (IR), as shown in 
                    <xref ref-type="table" rid="T2">
Table 2</xref>. These datasets were carefully chosen to cover a broad range of real-life situations where minority class data are clinically significant. The Pima Indiana Diabetes Dataset (IR = 1.89) includes the inception of diabetes in female patients based on diagnostic measurements. The Haberman&#x2019;s Cancer Survival Dataset (IR = 2.78) has patient records for those who had undergone breast cancer surgery, aimed at predicting post-operative survival. The Thoracic Surgery Dataset (IR = 7.27) is based on predicting survival following major lung surgery for patients, considering clinical and surgical factors. Finally, the Cervical Cancer Risk Dataset (IR = 14.6) uses personal health information and screening results to assess the risk of cervical cancer. These datasets provide a range of imbalance ratios where the minority class proportion is less than 40%, offering a robust testbed for validating the effectiveness of the OEDE.</p>
                <table-wrap id="T2" orientation="portrait" position="float">
                    <label>
Table 2. </label>
                    <caption>
                        <title>Summary of datasets used.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Dataset name</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">No. of instances</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">No. of features</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Minority class</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
Minority class proportion (%)</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
Imbalance Ratio (IR)</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Pima Indians Diabetes
                                    <sup>
                                        <xref ref-type="bibr" rid="ref27">27</xref>
                                    </sup>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">768</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">9</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Positive</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">34.9%</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">1.89</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Haberman&#x2019;s Cancer
                                    <sup>
                                        <xref ref-type="bibr" rid="ref28">28</xref>
                                    </sup>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">306</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">4</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Negative</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">26.5%</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">2.78</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Thoracic Surgery
                                    <sup>
                                        <xref ref-type="bibr" rid="ref29">29</xref>
                                    </sup>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">470</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">17</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Negative</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">12.1%</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">7.27</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Cervical Cancer Risk
                                    <sup>
                                        <xref ref-type="bibr" rid="ref30">30</xref>
                                    </sup>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">858</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">36</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Positive</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">6.4%</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">14.6</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
            </sec>
            <sec id="sec9">
                <title>3.2 Base models</title>
                <p>The proposed ensemble model leverages three different base learners, namely Logistic Regression (LR), Random Forest (RF), and XGBoost (XGB), with the aim of improving predictive performance based on model diversity. The LR is a linear model that predicts the probability of a binary outcome using a sigmoid function.
                    <sup>
                        <xref ref-type="bibr" rid="ref31">31</xref>
                    </sup> Its interpretability and probabilistic output make it a valuable baseline, particularly when modelling linear relationships.
                    <sup>
                        <xref ref-type="bibr" rid="ref32">32</xref>,
                        <xref ref-type="bibr" rid="ref33">33</xref>
                    </sup> RF is an ensemble of decision trees that introduces non-linearity and robustness by aggregating predictions from a group of trees trained on bootstrapped datasets and random feature subsets, thus removing variance and overfitting.
                    <sup>
                        <xref ref-type="bibr" rid="ref34">34</xref>
                    </sup> It performed well in capturing complex feature interactions.
                    <sup>
                        <xref ref-type="bibr" rid="ref35">35</xref>
                    </sup> XGBoost builds a sequence of trees, where each tree corrects the errors made by its predecessors, optimizing a normalized objective function 
                    <italic toggle="yes">L =</italic>

                    <inline-formula>

                        <mml:math display="inline">
                            <mml:msub>
                                <mml:mo>&#x2211;</mml:mo>
                                <mml:mi>l</mml:mi>
                            </mml:msub>
                            <mml:mi>l</mml:mi>
                            <mml:mrow>
                                <mml:mo stretchy="true">(</mml:mo>
                                <mml:msub>
                                    <mml:mi>y</mml:mi>
                                    <mml:mi>i</mml:mi>
                                </mml:msub>
                                <mml:mo>,</mml:mo>
                                <mml:mover accent="true">
                                    <mml:msub>
                                        <mml:mi>y</mml:mi>
                                        <mml:mi>i</mml:mi>
                                    </mml:msub>
                                    <mml:mo stretchy="true">&#x0302;</mml:mo>
                                </mml:mover>
                                <mml:mo stretchy="true">)</mml:mo>
                            </mml:mrow>
                            <mml:mo>+</mml:mo>
                            <mml:msub>
                                <mml:mo>&#x2211;</mml:mo>
                                <mml:mi>k</mml:mi>
                            </mml:msub>
                            <mml:mi mathvariant="normal">&#x03a9;</mml:mi>
                            <mml:mrow>
                                <mml:mo stretchy="true">(</mml:mo>
                                <mml:msub>
                                    <mml:mi>f</mml:mi>
                                    <mml:mi>k</mml:mi>
                                </mml:msub>
                                <mml:mo stretchy="true">)</mml:mo>
                            </mml:mrow>
                        </mml:math>
</inline-formula>

                    <italic toggle="yes">,</italic> where 
                    <inline-formula>

                        <mml:math display="inline">
                            <mml:mi mathvariant="normal">&#x03a9;</mml:mi>
                            <mml:mrow>
                                <mml:mo stretchy="true">(</mml:mo>
                                <mml:mi>f</mml:mi>
                                <mml:mo stretchy="true">)</mml:mo>
                            </mml:mrow>
                            <mml:mo>=</mml:mo>
                            <mml:mi mathvariant="italic">&#x03b3;T</mml:mi>
                            <mml:mo>+</mml:mo>
                            <mml:mfrac>
                                <mml:mn>1</mml:mn>
                                <mml:mn>2</mml:mn>
                            </mml:mfrac>
                            <mml:mi>&#x03bb;</mml:mi>
                            <mml:msup>
                                <mml:mrow>
                                    <mml:mo stretchy="true">&#x2016;</mml:mo>
                                    <mml:mi>w</mml:mi>
                                    <mml:mo stretchy="true">&#x2016;</mml:mo>
                                </mml:mrow>
                                <mml:mn>2</mml:mn>
                            </mml:msup>
                            <mml:mo>.</mml:mo>
                        </mml:math>
</inline-formula>
                    <sup>
                        <xref ref-type="bibr" rid="ref36">36</xref>
                    </sup> Its ability to model non-linearities with good precision,
                    <sup>
                        <xref ref-type="bibr" rid="ref37">37</xref>
                    </sup> regularization, and the ability to handle missing values
                    <sup>
                        <xref ref-type="bibr" rid="ref18">20</xref>
                    </sup> makes it a strong learner in the ensemble. Together, these models provide distinct perspectives, such as linear separability, variance minimization, and gradient-based optimization, making the ensemble more robust and less prone to overfitting than any single learner.</p>
            </sec>
            <sec id="sec10">
                <title>3.3 Sampling techniques</title>
                <p>We used oversampling techniques, the Synthetic Minority Oversampling Technique (SMOTE), and Adaptive Synthetic Sampling (ADASYN) to address the class imbalance of the original dataset. We assessed the performance of our proposed ensemble model on three different datasets: the original imbalanced dataset, SMOTE-balanced dataset, and ADASYN-balanced dataset. SMOTE creates synthetic instances for the minority class through interpolation between minority samples and their k-closest minority neighbour, thereby expanding the decision boundary and reducing overfitting to specific samples.
                    <sup>
                        <xref ref-type="bibr" rid="ref39">38</xref>
                    </sup> While SMOTE assumes the same significance to all instances of the minority class,
                    <sup>
                        <xref ref-type="bibr" rid="ref40">39</xref>
                    </sup> ADASYN focuses on varying the importance of individual minority instances according to their level of difficulty in learning.
                    <sup>
                        <xref ref-type="bibr" rid="ref41">40</xref>
                    </sup> It individually generates a synthetic minority sample, which is harder to classify owing to its lower density in the feature space, thereby promoting the generalization of the challenging part of the data.
                    <sup>
                        <xref ref-type="bibr" rid="ref42">41</xref>
                    </sup> Through this cross-dataset comparison of model performance, we aim to evaluate not only the generalizability of the proposed ensemble, but also how different balancing strategies influence its performance.</p>
            </sec>
            <sec id="sec11">
                <title>3.4 OEDE</title>
                <p>The Optimized Ensemble by Differential Evolution (OEDE) was designed as a novel ensemble model to alleviate the classification difficulties of imbalanced medical datasets. This method incorporates class-balanced base learners and a Differential Evolution (DE)-driven algorithm for AUC maximization. Three fundamentally different classifiers, LR, RF, and XGB, were used as base models. All base models were configured with a class-weighting mechanism during training to address the effects of class imbalance. LE employs class_weight= &#x2018;balanced&#x2019;, RF uses class_weight=&#x2018;balanced_subsample&#x2019;, while XGB is fine-tuned for logloss with consideration taken for label imbalance. These learners were independently trained on the same dataset and produced probability estimates for the positive class that were combined using a learned ensemble set of weights. OEDE does not combine the outputs of the base learners using fixed or heuristic-based weights; instead, it uses Differential Evolution (DE) for adaptive weight optimization. DE is a stochastic population-based global optimization method that is well-suited for non-differentiable and non-convex functions.
                    <sup>
                        <xref ref-type="bibr" rid="ref43">42</xref>&#x2013;
                        <xref ref-type="bibr" rid="ref46">45</xref>
                    </sup>
                </p>
                <p>Let the prediction probabilities from M base classifiers for a given instance 
                    <italic toggle="yes">x</italic> be denoted by [p
                    <sub>1</sub>(x), p
                    <sub>2</sub>(x), &#x2026;, p
                    <sub>M</sub>(x)]. Assign weight w
                    <sub>i</sub> to each base classifier 
                    <italic toggle="yes">i</italic>, where the weights satisfy:</p>
                <p>For the proposed ensemble, assign a weight w
                    <sub>i</sub> to each base model i such that:
                    <disp-formula id="e1">

                        <mml:math display="block">
                            <mml:munderover>
                                <mml:mo>&#x2211;</mml:mo>
                                <mml:mrow>
                                    <mml:mi>i</mml:mi>
                                    <mml:mo>=</mml:mo>
                                    <mml:mn>1</mml:mn>
                                </mml:mrow>
                                <mml:mi>M</mml:mi>
                            </mml:munderover>
                            <mml:msub>
                                <mml:mi>w</mml:mi>
                                <mml:mi>i</mml:mi>
                            </mml:msub>
                            <mml:mo>=</mml:mo>
                            <mml:mn>1</mml:mn>
                            <mml:mspace width="0.25em"/>
                            <mml:mtext>and</mml:mtext>
                            <mml:mspace width="0.25em"/>
                            <mml:msub>
                                <mml:mi>w</mml:mi>
                                <mml:mi>i</mml:mi>
                            </mml:msub>
                            <mml:mo>&#x2265;</mml:mo>
                            <mml:mn>0</mml:mn>
                            <mml:mspace width="0.25em"/>
                            <mml:mtext>for all</mml:mtext>
                            <mml:mspace width="0.25em"/>
                            <mml:mi>i</mml:mi>
                        </mml:math>

                        <label>(1)</label>
</disp-formula>
                </p>
                <p>The ensemble&#x2019;s predicted probability is given by the weighted average:
                    <disp-formula id="e2">

                        <mml:math display="block">
                            <mml:msub>
                                <mml:mi>p</mml:mi>
                                <mml:mi mathvariant="italic">ens</mml:mi>
                            </mml:msub>
                            <mml:mrow>
                                <mml:mo stretchy="true">(</mml:mo>
                                <mml:mi>x</mml:mi>
                                <mml:mo stretchy="true">)</mml:mo>
                            </mml:mrow>
                            <mml:mo>=</mml:mo>
                            <mml:munderover>
                                <mml:mo>&#x2211;</mml:mo>
                                <mml:mrow>
                                    <mml:mi>i</mml:mi>
                                    <mml:mo>=</mml:mo>
                                    <mml:mn>1</mml:mn>
                                </mml:mrow>
                                <mml:mi>M</mml:mi>
                            </mml:munderover>
                            <mml:msub>
                                <mml:mi>w</mml:mi>
                                <mml:mi>i</mml:mi>
                            </mml:msub>
                            <mml:msub>
                                <mml:mi>p</mml:mi>
                                <mml:mi>i</mml:mi>
                            </mml:msub>
                            <mml:mrow>
                                <mml:mo stretchy="true">(</mml:mo>
                                <mml:mi>x</mml:mi>
                                <mml:mo stretchy="true">)</mml:mo>
                            </mml:mrow>
                        </mml:math>

                        <label>(2)</label>
</disp-formula>
                </p>
                <p>The goal was to choose the weights w = [w
                    <sub>1</sub>, w
                    <sub>2</sub>, &#x2026;, w
                    <sub>M</sub>] to maximize the AUC on a validation set. Recall that the AUC represents the probability that a randomly chosen positive instance has a higher score than a randomly chosen negative instance.
                    <disp-formula id="e3">

                        <mml:math display="block">
                            <mml:mi mathvariant="italic">AUC</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:mfrac>
                                <mml:mn>1</mml:mn>
                                <mml:mrow>
                                    <mml:mo>|</mml:mo>
                                    <mml:msup>
                                        <mml:mi>S</mml:mi>
                                        <mml:mo>+</mml:mo>
                                    </mml:msup>
                                    <mml:mo stretchy="true">&#x2016;</mml:mo>
                                    <mml:msup>
                                        <mml:mi>S</mml:mi>
                                        <mml:mo>&#x2212;</mml:mo>
                                    </mml:msup>
                                    <mml:mo>|</mml:mo>
                                </mml:mrow>
                            </mml:mfrac>
                            <mml:munder>
                                <mml:mo>&#x2211;</mml:mo>
                                <mml:mrow>
                                    <mml:mi>i</mml:mi>
                                    <mml:mo>&#x2208;</mml:mo>
                                    <mml:msup>
                                        <mml:mi>S</mml:mi>
                                        <mml:mo>+</mml:mo>
                                    </mml:msup>
                                </mml:mrow>
                            </mml:munder>
                            <mml:munder>
                                <mml:mo>&#x2211;</mml:mo>
                                <mml:mrow>
                                    <mml:mi>j</mml:mi>
                                    <mml:mo>&#x2208;</mml:mo>
                                    <mml:msup>
                                        <mml:mi>S</mml:mi>
                                        <mml:mo>&#x2212;</mml:mo>
                                    </mml:msup>
                                </mml:mrow>
                            </mml:munder>
                            <mml:mi>I</mml:mi>
                            <mml:mrow>
                                <mml:mo stretchy="true">(</mml:mo>
                                <mml:msub>
                                    <mml:mi>p</mml:mi>
                                    <mml:mi mathvariant="italic">ens</mml:mi>
                                </mml:msub>
                                <mml:mrow>
                                    <mml:mo stretchy="true">(</mml:mo>
                                    <mml:mi>i</mml:mi>
                                    <mml:mo stretchy="true">)</mml:mo>
                                </mml:mrow>
                                <mml:mo>&gt;</mml:mo>
                                <mml:msub>
                                    <mml:mi>p</mml:mi>
                                    <mml:mi mathvariant="italic">ens</mml:mi>
                                </mml:msub>
                                <mml:mrow>
                                    <mml:mo stretchy="true">(</mml:mo>
                                    <mml:mi>j</mml:mi>
                                    <mml:mo stretchy="true">)</mml:mo>
                                </mml:mrow>
                                <mml:mo stretchy="true">)</mml:mo>
                            </mml:mrow>
                        </mml:math>

                        <label>(3)</label>
</disp-formula>where 
                    <italic toggle="yes">S
                        <sup>+</sup>:</italic> index of positive instances, 
                    <italic toggle="yes">S
                        <sup>-</sup>:</italic> index of negative instances, 
                    <italic toggle="yes">I:</italic> indicator function.</p>
                <p>Because many optimization algorithms (such as Differential Evolution) are formulated as minimization problems, the loss function is defined as the negative AUC:
                    <disp-formula id="e4">

                        <mml:math display="block">
                            <mml:mi>L</mml:mi>
                            <mml:mrow>
                                <mml:mo stretchy="true">(</mml:mo>
                                <mml:mi>w</mml:mi>
                                <mml:mo stretchy="true">)</mml:mo>
                            </mml:mrow>
                            <mml:mo>=</mml:mo>
                            <mml:mo>&#x2212;</mml:mo>
                            <mml:mi mathvariant="italic">AUC</mml:mi>
                            <mml:mrow>
                                <mml:mo stretchy="true">(</mml:mo>
                                <mml:msub>
                                    <mml:mi>y</mml:mi>
                                    <mml:mi mathvariant="italic">val</mml:mi>
                                </mml:msub>
                                <mml:mo>,</mml:mo>
                                <mml:msub>
                                    <mml:mi>p</mml:mi>
                                    <mml:mi mathvariant="italic">ens</mml:mi>
                                </mml:msub>
                                <mml:mrow>
                                    <mml:mo stretchy="true">(</mml:mo>
                                    <mml:mi mathvariant="italic">val</mml:mi>
                                    <mml:mo stretchy="true">)</mml:mo>
                                </mml:mrow>
                                <mml:mo stretchy="true">)</mml:mo>
                            </mml:mrow>
                        </mml:math>

                        <label>(4)</label>
</disp-formula>
                </p>
                <p>Thus, the optimization problem becomes:
                    <disp-formula id="e5">

                        <mml:math display="block">
                            <mml:munder>
                                <mml:mo>min</mml:mo>
                                <mml:mrow>
                                    <mml:mi>w</mml:mi>
                                    <mml:mspace width="0.25em"/>
                                </mml:mrow>
                            </mml:munder>
                            <mml:mi>L</mml:mi>
                            <mml:mrow>
                                <mml:mo stretchy="true">(</mml:mo>
                                <mml:mi>w</mml:mi>
                                <mml:mo stretchy="true">)</mml:mo>
                            </mml:mrow>
                            <mml:mspace width="0.25em"/>
                            <mml:mtext mathvariant="italic">subject to</mml:mtext>
                            <mml:mspace width="0.25em"/>
                            <mml:munderover>
                                <mml:mo>&#x2211;</mml:mo>
                                <mml:mrow>
                                    <mml:mi>i</mml:mi>
                                    <mml:mo>=</mml:mo>
                                    <mml:mn>1</mml:mn>
                                </mml:mrow>
                                <mml:mi>M</mml:mi>
                            </mml:munderover>
                            <mml:msub>
                                <mml:mi>w</mml:mi>
                                <mml:mi>i</mml:mi>
                            </mml:msub>
                            <mml:mo>=</mml:mo>
                            <mml:mn>1</mml:mn>
                            <mml:mo>,</mml:mo>
                            <mml:msub>
                                <mml:mi>w</mml:mi>
                                <mml:mi>i</mml:mi>
                            </mml:msub>
                            <mml:mo>&#x2265;</mml:mo>
                            <mml:mn>0</mml:mn>
                            <mml:mo>&#x2200;</mml:mo>
                            <mml:mi>i</mml:mi>
                        </mml:math>

                        <label>(5)</label>
</disp-formula>
                </p>
                <p>DE is selected for this task because of its robustness in optimizing non-differentiable and non-convex functions such as AUC. DE evolves a population of weight vectors over generations using mutation, crossover, and selection to converge to a globally optimal solution. The proposed process is described by 
                    <xref ref-type="boxed-text" rid="B1">Algorithm 1</xref>.</p>
                <boxed-text id="B1" orientation="portrait" position="float">
                    <label>Algorithm 1. </label>
                    <caption>
                        <title>Optimized Ensemble by Differential Evolution (OEDE).</title>
                    </caption>
                    <p>

                        <bold>Input:</bold>

                        <list list-type="bullet">
                            <list-item>
                                <label>-</label>
                                <p>Traini Set: X_train, y_train</p>
                            </list-item>
                            <list-item>
                                <label>-</label>
                                <p>Validation Set: X_val, y_val</p>
                            </list-item>
                            <list-item>
                                <label>-</label>
                                <p>Test Set: X_test, y_test</p>
                            </list-item>
                            <list-item>
                                <label>-</label>
                                <p>DE parameters: population size P, generations G</p>
                            </list-item>
                        </list>
</p>
                    <p>

                        <bold>Output:</bold>

                        <list list-type="bullet">
                            <list-item>
                                <label>-</label>
                                <p>Test set predictions y_pred</p>
                            </list-item>
                            <list-item>
                                <label>-</label>
                                <p>Evaluation metrics: Accuracy, Precision, Recall, F1-score, AUC</p>
                            </list-item>
                        </list>
                    </p>
                    <p>1. Train LR, RF, XGB on (X_train, y_train)</p>
                    <p>2. For each model m &#x2208; {LR, RF, XGB}:</p>
                    <p>3.&#x2003;p_val_m = m.predict_proba(X_val)[:, 1]</p>
                    <p>4. P_val = [p_val_LR, p_val_RF, p_val_XGB]</p>
                    <p>5. function AUC_Loss(weights, P_val, y_val):</p>
                    <p>6.&#x2003;Normalize weights: w = weights/sum (weights)</p>
                    <p>7.&#x2003;Ensemble prediction: p_ens = dot(P_val, w)</p>
                    <p>8.&#x2003;Return -AUC(y_val, p_ens)</p>
                    <p>9. bounds = [(0, 1), (0, 1), (0, 1)]</p>
                    <p>10. w_opt = DifferentialEvolution (AUC_Loss, bounds, args=(P_val, y_val))</p>
                    <p>11. w_opt: w_opt = w_opt/sum(w_opt)</p>
                    <p>12. For each m &#x2208; {LR, RF, XGB}:</p>
                    <p>13.&#x2003;p_test_m = m.predict_proba(X_test)[:, 1]</p>
                    <p>14. P_test = [p_test_LR, p_test_RF, p_test_XGB]</p>
                    <p>15. p_test_ens = dot(P_test, w_opt)</p>
                    <p>16. y_pred = 1 if p_test_ens &#x2265; 0.5 else 0</p>
                    <p>17. Return y_pred, evaluation metrics</p>
                </boxed-text>
            </sec>
            <sec id="sec12">
                <title>3.5 Performance matrices</title>
                <p>We used five common yet important classification metrics such as Accuracy, Precision, Recall, F1-Score, and Area Under the ROC curve (AUC), to evaluate the performance of the proposed OEDE model. While working with the imbalanced dataset, these measures offer a comprehensive insight into the efficiency of the model. Let P
                    <sub>C</sub>, N
                    <sub>C</sub>, P
                    <sub>E</sub>, and N
                    <sub>E</sub> represent the numbers of correctly classified positives, correctly classified negatives, false positives, and false negatives, respectively.
                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>The model&#x2019;s overall correctness is gauged by accuracy.</p>
                        </list-item>
                    </list>

                    <disp-formula id="e6">

                        <mml:math display="block">
                            <mml:mtext mathvariant="italic">Accuracy</mml:mtext>
                            <mml:mo>=</mml:mo>
                            <mml:mfrac>
                                <mml:mrow>
                                    <mml:msub>
                                        <mml:mi>P</mml:mi>
                                        <mml:mi>c</mml:mi>
                                    </mml:msub>
                                    <mml:mo>+</mml:mo>
                                    <mml:msub>
                                        <mml:mi>N</mml:mi>
                                        <mml:mi>E</mml:mi>
                                    </mml:msub>
                                </mml:mrow>
                                <mml:mrow>
                                    <mml:msub>
                                        <mml:mi>P</mml:mi>
                                        <mml:mi>c</mml:mi>
                                    </mml:msub>
                                    <mml:mo>+</mml:mo>
                                    <mml:msub>
                                        <mml:mi>N</mml:mi>
                                        <mml:mi>C</mml:mi>
                                    </mml:msub>
                                    <mml:mo>+</mml:mo>
                                    <mml:msub>
                                        <mml:mi>P</mml:mi>
                                        <mml:mi>E</mml:mi>
                                    </mml:msub>
                                    <mml:mo>+</mml:mo>
                                    <mml:msub>
                                        <mml:mi>N</mml:mi>
                                        <mml:mi>E</mml:mi>
                                    </mml:msub>
                                </mml:mrow>
                            </mml:mfrac>
                        </mml:math>

                        <label>(6)</label>
</disp-formula>

                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>The percentage of correct positive predictions among all positive predictions is known as the precision.</p>
                        </list-item>
                    </list>

                    <disp-formula id="e7">

                        <mml:math display="block">
                            <mml:mtext mathvariant="italic">Precision</mml:mtext>
                            <mml:mo>=</mml:mo>
                            <mml:mfrac>
                                <mml:msub>
                                    <mml:mi>P</mml:mi>
                                    <mml:mi>c</mml:mi>
                                </mml:msub>
                                <mml:mrow>
                                    <mml:msub>
                                        <mml:mi>P</mml:mi>
                                        <mml:mi>c</mml:mi>
                                    </mml:msub>
                                    <mml:mo>+</mml:mo>
                                    <mml:msub>
                                        <mml:mi>P</mml:mi>
                                        <mml:mi>E</mml:mi>
                                    </mml:msub>
                                </mml:mrow>
                            </mml:mfrac>
                        </mml:math>

                        <label>(7)</label>
</disp-formula>

                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Recall shows how well the model recognizes the actual positives.</p>
                        </list-item>
                    </list>

                    <disp-formula id="e8">

                        <mml:math display="block">
                            <mml:mtext mathvariant="italic">Recall</mml:mtext>
                            <mml:mo>=</mml:mo>
                            <mml:mfrac>
                                <mml:msub>
                                    <mml:mi>P</mml:mi>
                                    <mml:mi>c</mml:mi>
                                </mml:msub>
                                <mml:mrow>
                                    <mml:msub>
                                        <mml:mi>P</mml:mi>
                                        <mml:mi>c</mml:mi>
                                    </mml:msub>
                                    <mml:mo>+</mml:mo>
                                    <mml:msub>
                                        <mml:mi>N</mml:mi>
                                        <mml:mi>E</mml:mi>
                                    </mml:msub>
                                </mml:mrow>
                            </mml:mfrac>
                        </mml:math>

                        <label>(8)</label>
</disp-formula>

                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>F1-Score balances the Precision and Recall.</p>
                        </list-item>
                    </list>

                    <disp-formula id="e9">

                        <mml:math display="block">
                            <mml:mi>F</mml:mi>
                            <mml:mn>1</mml:mn>
                            <mml:mo>=</mml:mo>
                            <mml:mn>2</mml:mn>
                            <mml:mo>&#x00d7;</mml:mo>
                            <mml:mfrac>
                                <mml:mrow>
                                    <mml:mtext mathvariant="italic">Precision</mml:mtext>
                                    <mml:mo>&#x00d7;</mml:mo>
                                    <mml:mtext mathvariant="italic">Recall</mml:mtext>
                                </mml:mrow>
                                <mml:mrow>
                                    <mml:mtext mathvariant="italic">Precision</mml:mtext>
                                    <mml:mo>+</mml:mo>
                                    <mml:mtext mathvariant="italic">Recall</mml:mtext>
                                </mml:mrow>
                            </mml:mfrac>
                        </mml:math>

                        <label>(9)</label>
</disp-formula>
                </p>
                <p>
When it comes to an imbalanced dataset, the AUC is especially significant, which represents the area under the ROC curve by plotting the true positive rate 
                    <inline-formula>

                        <mml:math display="inline">
                            <mml:mrow>
                                <mml:mo stretchy="true">(</mml:mo>
                                <mml:mfrac>
                                    <mml:msub>
                                        <mml:mi>P</mml:mi>
                                        <mml:mi>C</mml:mi>
                                    </mml:msub>
                                    <mml:mrow>
                                        <mml:msub>
                                            <mml:mi>P</mml:mi>
                                            <mml:mi>c</mml:mi>
                                        </mml:msub>
                                        <mml:mo>+</mml:mo>
                                        <mml:msub>
                                            <mml:mi>N</mml:mi>
                                            <mml:mi>E</mml:mi>
                                        </mml:msub>
                                    </mml:mrow>
                                </mml:mfrac>
                                <mml:mo stretchy="true">)</mml:mo>
                            </mml:mrow>
                        </mml:math>
</inline-formula> and false positive rate 
                    <inline-formula>

                        <mml:math display="inline">
                            <mml:mrow>
                                <mml:mo stretchy="true">(</mml:mo>
                                <mml:mfrac>
                                    <mml:msub>
                                        <mml:mi>P</mml:mi>
                                        <mml:mi>E</mml:mi>
                                    </mml:msub>
                                    <mml:mrow>
                                        <mml:msub>
                                            <mml:mi>P</mml:mi>
                                            <mml:mi>E</mml:mi>
                                        </mml:msub>
                                        <mml:mo>+</mml:mo>
                                        <mml:msub>
                                            <mml:mi>N</mml:mi>
                                            <mml:mi>C</mml:mi>
                                        </mml:msub>
                                    </mml:mrow>
                                </mml:mfrac>
                                <mml:mo stretchy="true">)</mml:mo>
                            </mml:mrow>
                        </mml:math>
</inline-formula> across different classification thresholds. A higher AUC represents better separability. Finally, we visualized the ROC curve of each model, which offered an intuitive view of the trade-off between false alarms and sensitivity. The closer the curve is to the upper-left corner, the better is the classifier.</p>
            </sec>
        </sec>
        <sec id="sec13" sec-type="results|discussion">
            <title>4. Results and discussion</title>
            <p>The experiment involved four different medical datasets with different imbalance ratios (1.89 &#x2013; 14.6) used for the performance analysis of the proposed model. This section provides a detailed analysis of the proposed model and compares the performance matrices with the existing base models after training them in three different conditions: the original imbalanced dataset, data balanced using SMOTE, and data balanced using ADASYN. The performance matrices were evaluated using a never-observed imbalance test split.</p>
            <sec id="sec14">
                <title>4.1 Pima indiana diabetes dataset</title>
                <p>On the original dataset, OEDE achieved high accuracy and AUC score, as shown in 
                    <xref ref-type="table" rid="T3">
Table 3</xref>, effectively discriminating between diabetic and non-diabetic samples without artificial rebalancing. Although ensemble models such as RF and XGBoost demonstrated competitive accuracy, their AUC and recall values were low, indicating a bias towards the majority classes. The OEDE tended to have a high F1 score, while maintaining a better balance between precision and recall, as shown in 
                    <xref ref-type="fig" rid="f2">
Figure 2</xref>. The overall performance of all models improved when the dataset was balanced using SMOTE, as indicated by the increased recall and F1-score, but OEDE still outperformed the others in terms of accuracy, AUC, and F1-score. Notably, the change in the AUC value was marginal, indicating that merely balancing the data may not be sufficient. In the ADASYN-balanced scenario, it tends to echo those of SMOTE but with minor instability for some models because ADASYN introduces noise in the minority samples. OEDE again outperformed other baseline models with a minor drop in precision while maintaining the AUC and showed a balanced performance throughout the datasets.</p>
                <table-wrap id="T3" orientation="portrait" position="float">
                    <label>
Table 3. </label>
                    <caption>
                        <title>AUC and Accuracy comparison of OEDE and state-of-the-art ML models on Pima Indiana diabetes dataset.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="2" valign="top">Models</th>
                                <th align="left" colspan="2" rowspan="1" valign="top">Original imbalanced data</th>
                                <th align="left" colspan="2" rowspan="1" valign="top">SMOTE-Balanced data</th>
                                <th align="left" colspan="2" rowspan="1" valign="top">ADASYN-Balanced data</th>
                            </tr>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">AUC</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">AUC</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
AUC</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">AB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">74.46</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">80.1</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">77.06</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">82.4</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">74.46</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">80.44</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">BRF</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">76.19</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">83.9</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">76.19</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">83.49</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">76.62</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">84.17</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">ET</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">75.76</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">82.4</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">73.59</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">81.79</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">74.03</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">81.5</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">GB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">74.89</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">82.77</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">75.32</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">84.26</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">77.92</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">82.94</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">LGBM</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">74.03</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">81.52</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">76.19</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">80.98</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">75.76</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">81.81</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">RF</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">76.19</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">83.15</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">75.76</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">85.35</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">75.76</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">84.16</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">XGB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">76.19</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">81.33</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">76.19</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">80.89</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">77.06</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">80.87</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">CB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">77.06</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">83.82</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">77.06</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">83.93</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">77.06</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">82.45</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">EE</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">76.19</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">79.81</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">77.06</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">82.4</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">75.32</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">80.05</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>OEDE</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>77.49</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>84.02</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>77.92</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>84.31</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>77.92</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>84.37</bold>
</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <fig fig-type="figure" id="f2" orientation="portrait" position="float">
                    <label>
Figure 2. </label>
                    <caption>
                        <title>Precision, Recall, and F1-score comprise between OEDE and state-of-the-art ML models on the Pima Indiana Diabetes Dataset.</title>
                    </caption>
                    <graphic id="gr2" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/195381/4d34d116-592a-4f89-a5e6-e4b9d448aa71_figure2.gif"/>
                </fig>
            </sec>
            <sec id="sec15">
                <title>4.2 Haberman&#x2019;s cancer dataset</title>
                <p>OEDE achieved a higher accuracy, AUC, and F1-score than traditional ensemble models such as RF, AB, and CB in an imbalanced dataset, as shown in 
                    <xref ref-type="table" rid="T4">
Table 4</xref>. While some models show comparable accuracy, OEDE shows stability in precision and recall, leading to a better F1-score as shown in 
                    <xref ref-type="fig" rid="f3">
Figure 3</xref>, demonstrating its ability to detect the minority class without compromising the overall correctness. After the dataset is balanced with SMOTE, an improvement is observed in recall for most of the models, which shows that the models benefit from the synthetic sample. The OEDE maintained its AUC lead, highlighting its ability to influence informative patterns more effectively, even in a balanced dataset. Its high F1-score reflects food robustness against overfeeding for synthetic data. When ADASYN is used for data balancing, some models show instability in precision and recall, as ADASYN tends to produce harder-to-learn synthetic samples, while OEDE has a high-performance score without sacrificing stability and sensitivity.</p>
                <table-wrap id="T4" orientation="portrait" position="float">
                    <label>
Table 4. </label>
                    <caption>
                        <title>AUC and Accuracy comparison of OEDE and state-of-the-art ML models on Haberman&#x2019;s Cancer Dataset.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="2" valign="top">Models</th>
                                <th align="left" colspan="2" rowspan="1" valign="top">Original imbalanced data</th>
                                <th align="left" colspan="2" rowspan="1" valign="top">SMOTE-Balanced data</th>
                                <th align="left" colspan="2" rowspan="1" valign="top">ADASYN-Balanced data</th>
                            </tr>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">AUC</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">AUC</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
AUC</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">AB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">70.65</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">61.04</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">70.65</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">66.75</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">71.74</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">67.74</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">BRF</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">68.48</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">62.88</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">67.39</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">61.68</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">64.13</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">61.8</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">ET</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">70.65</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">60.61</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">68.48</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">64.22</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">66.3</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">64.66</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">GB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">71.74</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">68.76</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">68.48</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">67.45</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">69.57</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">67.4</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">LGBM</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">65.22</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">65.33</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">66.3</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">61.77</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">65.22</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">62.47</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">RF</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">69.57</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">66.03</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">68.48</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">66.08</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">65.22</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">66.9</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">XGB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">71.74</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">67.77</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">66.3</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">66.61</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">65.22</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">66.14</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">CB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">67.39</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">70.8</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">71.74</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">66.96</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">71.74</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">67.74</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">EE</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">65.22</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">68.12</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">70.65</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">66.75</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">68.48</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">66.78</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>OEDE</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>72.83</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>73.25</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>68.48</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>70.37</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>69.57</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>66.33</bold>
</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <fig fig-type="figure" id="f3" orientation="portrait" position="float">
                    <label>
Figure 3. </label>
                    <caption>
                        <title>Precision, Recall, and F1-score comprise between OEDE and state-of-the-art ML models on Haberman&#x2019;s Cancer Dataset.</title>
                    </caption>
                    <graphic id="gr3" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/195381/4d34d116-592a-4f89-a5e6-e4b9d448aa71_figure3.gif"/>
                </fig>
            </sec>
            <sec id="sec16">
                <title>4.3 Thoracic surgery data</title>
                <p>The thoracic surgery dataset showed a significant imbalance in class distribution, challenging most of the classifiers that favour the majority classes. OEDE outperformed traditional ensemble models such as RF, LGBM, and BRF in terms of AUC and F1-score, as shown in 
                    <xref ref-type="table" rid="T5">
Table 5</xref> and 
                    <xref ref-type="fig" rid="f4">
Figure 4</xref>, while the original imbalanced dataset was used. Although many baseline models achieve relatively high accuracy, OEDE maintains its balanced performance, achieving higher recall for minority classes and offering better overall performance. With the SMOTE-balanced dataset, most models improve recall values owing to synthetic samples. OEDE continues to display a superior performance matrix, especially the AUC value, which indicates the robustness of the model even when the dataset is balanced with synthetic data without dropping precision or overfitting. Similar to the previous datasets, the performance of the baseline models fluctuated in the ADASYN-balanced dataset, but OEDE maintained a stable performance and showed high F1 and AUC scores. This generalizability and consistent performance are possible owing to the differential evaluation-based weight optimization.</p>
                <table-wrap id="T5" orientation="portrait" position="float">
                    <label>
Table 5. </label>
                    <caption>
                        <title>AUC and Accuracy comparison of OEDE and state-of-the-art ML models on the Thoracic Surgery dataset.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="2" valign="top">Models</th>
                                <th align="left" colspan="2" rowspan="1" valign="top">Original imbalanced data</th>
                                <th align="left" colspan="2" rowspan="1" valign="top">SMOTE-Balanced data</th>
                                <th align="left" colspan="2" rowspan="1" valign="top">ADASYN-Balanced data</th>
                            </tr>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">AUC</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">AUC</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
AUC</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">AB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">80.14</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">47.15</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">71.63</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">44.14</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">73.05</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">50.11</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">BRF</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">81.56</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">50.48</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">77.31</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">47.61</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">75.89</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">46.19</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">ET</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">80.14</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">48.11</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">78.01</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">50.89</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">78.01</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">45.76</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">GB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">82.98</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">56.29</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">76.6</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">54.93</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">78.01</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">50.04</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">LGBM</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">82.98</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">50.57</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">78.01</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">53.45</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">78.01</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">50.18</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">RF</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">79.43</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">50.82</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">76.6</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">47.86</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">79.43</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">48.15</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">XGB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">82.98</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">50.82</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">76.6</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">44.23</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">74.47</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">43.38</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">CB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">58.87</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">49.61</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">78.01</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">52.62</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">79.43</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">49.73</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">EE</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">57.45</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">57.59</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">71.63</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">44.14</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">72.34</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">47.19</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>OEDE</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>84.4</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>70.08</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>81.56</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>69.37</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>79.43</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>65.4</bold>
</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <fig fig-type="figure" id="f4" orientation="portrait" position="float">
                    <label>
Figure 4. </label>
                    <caption>
                        <title>Precision, Recall, and F1-score comprise between OEDE and state-of-the-art ML models on the Thoracic surgery dataset.</title>
                    </caption>
                    <graphic id="gr4" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/195381/4d34d116-592a-4f89-a5e6-e4b9d448aa71_figure4.gif"/>
                </fig>
            </sec>
            <sec id="sec17">
                <title>4.4 Cervical cancer risk dataset</title>
                <p>The Cervical Cancer Risk dataset shows a significant difficulty owing to high-class imbalance, where high-risk cases are less than 7% of the dataset, which tends to affect the performance of the traditional model. On the imbalanced dataset, most baseline models, such as RF, ET, and CB, struggle with the minority class and show a low recall and F1-score, despite considerable accuracy, as shown in 
                    <xref ref-type="table" rid="T6">
Table 6</xref> and 
                    <xref ref-type="fig" rid="f5">
Figure 5</xref>. In contrast, OEDE is able to distinguish the minority class effectively owing to adaptive weighting, as reflected in the AUC and F1-score. Again, SMOTE improves the recall for all models, including OEDE, while maintaining competitive precision and achieving the highest AUC. For the ADASYN-balanced dataset, OEDE maintained high performance with the highest AUC and F1-score, supporting its resilience and generalizability.</p>
                <table-wrap id="T6" orientation="portrait" position="float">
                    <label>
Table 6. </label>
                    <caption>
                        <title>AUC and Accuracy comparison of OEDE and state-of-the-art ML models on the cervical cancer risk dataset.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="2" valign="top">Models</th>
                                <th align="left" colspan="2" rowspan="1" valign="top">Original imbalanced data</th>
                                <th align="left" colspan="2" rowspan="1" valign="top">SMOTE-Balanced data</th>
                                <th align="left" colspan="2" rowspan="1" valign="top">ADASYN-Balanced data</th>
                            </tr>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">AUC</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">AUC</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
AUC</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">AB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">93.57</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">81.45</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.96</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">89.22</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.19</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">92.24</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">BRF</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.35</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.94</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">95.35</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">96.3</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.35</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">95.47</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">ET</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.35</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">92.27</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">95.35</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">89.33</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.57</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">88.31</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">GB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.35</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.13</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">95.35</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">96.38</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.74</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">95.02</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">LGBM</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.74</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.05</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.96</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.29</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.96</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">93.03</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">RF</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.74</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.27</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.96</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.6</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.35</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.05</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">XGB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.35</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">93.42</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.74</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">96.21</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.35</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">96.08</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">CB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">92.8</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">95.17</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.35</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">97.23</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.35</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">96.89</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">EE</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">92.8</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">95.12</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.96</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">89.22</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">94.19</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">92.24</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>OEDE</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>95.35</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>96.73</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>95.19</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>97.19</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>94.96</bold>
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>97.89</bold>
</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <fig fig-type="figure" id="f5" orientation="portrait" position="float">
                    <label>
Figure 5. </label>
                    <caption>
                        <title>Precision, Recall, and F1-score comprise between OEDE and state-of-the-art ML models on the Cervical Cancer Risk Dataset.</title>
                    </caption>
                    <graphic id="gr5" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/195381/4d34d116-592a-4f89-a5e6-e4b9d448aa71_figure5.gif"/>
                </fig>
                <p>The box plot overlaid with swarm points (
                    <xref ref-type="fig" rid="f6">
Figure 6</xref>) demonstrates the diversity of the different datasets. The median and IQR of AUC on the Cervical dataset presented the highest AUC scores (~0.95 median), demonstrating excellent and stable classification performance. Even for the Pima dataset, a moderate spread was observed, indicating reasonable generalization for the model. The Haberman and Thoracic datasets, on the other hand, demonstrated lower and more variable AUCs, around a median of 0.66&#x2013;0.68, suggesting struggles possibly with class imbalance or limited separability.</p>
                <fig fig-type="figure" id="f6" orientation="portrait" position="float">
                    <label>
Figure 6. </label>
                    <caption>
                        <title>AUC distribution across datasets.</title>
                    </caption>
                    <graphic id="gr6" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/195381/4d34d116-592a-4f89-a5e6-e4b9d448aa71_figure6.gif"/>
                </fig>
                <p>The performance of the OEDE across the four benchmark datasets demonstrates its robustness and adaptability across balanced and imbalanced datasets. Despite varying levels of class imbalance, OEDE achieved superior performance, outperforming a range of traditional baseline models. To support the numerical findings, ROC curves were plotted (
                    <xref ref-type="fig" rid="f7">
Figure 7</xref>) for all the datasets to visually represent the discrimination ability of the model. Consistently across all datasets, the ROC curve of OEDE was favourable, aligning with the AUC values achieved.</p>
                <fig fig-type="figure" id="f7" orientation="portrait" position="float">
                    <label>
Figure 7. </label>
                    <caption>
                        <title>ROC plots of all the models on different datasets.</title>
                        <p>

                            <bold>(A)</bold> Pima Indiana Diabetes Dataset. 
                            <bold>(B)</bold> Haberman&#x2019;s Cancer Dataset. 
                            <bold>(C)</bold> Thoracic Surgery Dataset. 
                            <bold>(D)</bold> Cervical Cancer Risk Dataset.</p>
                    </caption>
                    <graphic id="gr7a" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/195381/4d34d116-592a-4f89-a5e6-e4b9d448aa71_figure7a.gif"/>
                    <graphic id="gr7b" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/195381/4d34d116-592a-4f89-a5e6-e4b9d448aa71_figure7b.gif"/>
                </fig>
            </sec>
        </sec>
        <sec id="sec18" sec-type="conclusion">
            <title>5. Conclusion</title>
            <p>In this study, a novel ensemble model, OEDE, was proposed to address a methodological gap in medical classification and the challenges of imbalanced medical datasets. The limitation of traditional ensemble techniques that rely on static weight initialization and surrogate loss function thus fails to optimize non-differentiable clinically relevant matrices. The key innovation includes the ensemble integration framework of the predictive power of diverse classifiers such as LR, RF, and XGB through a differential evolution-based ensemble, which learns the optimal weight by maximizing the AUC. Unlike traditional models, OEDE adapts the decision boundary to enhance the discrimination capacity, especially in the minority class. The approach also includes class-balanced-based learning, ensuring that base models reduce the imbalance at the source, resulting in a robust ensemble model that generalizes well across datasets with different characteristics and imbalance ratios. The performance of OEDE was assessed on four different medical datasets with class imbalance ratios from 1.89 to 14.6, to verify its ability to handle class imbalance data. The results demonstrated that OEDE almost always performed significantly better than the state-of-the-art ML models in terms of AUC, accuracy, and F1-score, and the model was robust under different data balancing techniques such as SMOTE and ADSYN. Adding the ROC curves of all the models and datasets also confirms the superior separability of OEDE and makes it a useful framework for practical real-world classification problems.</p>
        </sec>
        <sec id="sec20">
            <title>Ethical approval</title>
            <p>This study does not require any ethical approval since this study only used publicly accessible, de-identified datasets. There were no experiments with human subjects, and no new data were gathered. The datasets do not contain any personally identifiable information and are available from recognized open repositories.</p>
        </sec>
    </body>
    <back>
        <sec id="sec23" sec-type="data-availability">
            <title>Data availability</title>
            <p>The datasets used in this research are publicly available from recognized data repositories and can be accessed through the following links. The Pima Indians Diabetes Dataset, originally hosted on UCI ML Repository is no longer available there. However, it can be accessed via Mendeley Data.
                <list list-type="bullet">
                    <list-item>
                        <label>&#x2022;</label>
                        <p>UCI Machine Learning Repository. Haberman&#x2019;s Survival Dataset. DOI: 
                            <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.24432/C5XK51">10.24432/C5XK51</ext-link>
                        </p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>UCI Machine Learning Repository. Thoracic Surgery Data. DOI: 
                            <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.24432/C5Z60N">10.24432/C5Z60N</ext-link>
                        </p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>UCI Machine Learning Repository. Cervical Cancer Dataset. DOI: 
                            <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.24432/C5Z310">10.24432/C5Z310</ext-link>
                        </p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>Mendeley Data. Pima Indians Diabetes Dataset. DOI: 
                            <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.17632/7zcc8v6hvp.1">10.17632/7zcc8v6hvp.1</ext-link>
                        </p>
                    </list-item>
                </list>
            </p>
            <p>All the data is publicly available under the terms of the 
                <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">

                    <bold>Creative Commons Attribution 4.0 International</bold>
</ext-link> 
                <bold>
(CC BY 4.0)</bold>.</p>
        </sec>
        <ref-list>
            <title>References</title>
            <ref id="ref1">
                <label>1</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kolasa</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Admassu</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ho&#x0142;ownia-Voloskova</surname>
                            <given-names>M</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Systematic reviews of machine learning in healthcare: a literature review.</article-title>
                    <source>

                        <italic toggle="yes">Expert Rev. Pharmacoecon. Outcomes Res.</italic>
</source>
                    <year>Jan. 2024</year>;<volume>24</volume>(<issue>1</issue>):<fpage>63</fpage>&#x2013;<lpage>115</lpage>.
                    <pub-id pub-id-type="doi">10.1080/14737167.2023.2279107</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref2">
                <label>2</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Zhang</surname>
                            <given-names>H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gerych</surname>
                            <given-names>W</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ghassemi</surname>
                            <given-names>M</given-names>
                        </name>
</person-group>:
                    <article-title>A data-centric perspective to fair machine learning for healthcare.</article-title>
                    <source>

                        <italic toggle="yes">Nature Reviews Methods Primers.</italic>
</source>
                    <year>Nov. 2024</year>;<volume>4</volume>(<issue>1</issue>):<fpage>86</fpage>.
                    <pub-id pub-id-type="doi">10.1038/s43586-024-00371-x</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref3">
                <label>3</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Roy</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Roy</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Roy</surname>
                            <given-names>U</given-names>
                        </name>
</person-group>:
                    <article-title>Learning from Imbalanced Data in Healthcare: State-of-the-Art and Research Challenges.</article-title>
                    <year>2024</year>;<fpage>19</fpage>&#x2013;<lpage>32</lpage>.
                    <pub-id pub-id-type="doi">10.1007/978-981-99-8853-2_2</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref4">
                <label>4</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kosolwattana</surname>
                            <given-names>T</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Liu</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hu</surname>
                            <given-names>R</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A self-inspected adaptive SMOTE algorithm (SASMOTE) for highly imbalanced data classification in healthcare.</article-title>
                    <source>

                        <italic toggle="yes">BioData Min.</italic>
</source>
                    <year>Apr. 2023</year>;<volume>16</volume>(<issue>1</issue>):<fpage>15</fpage>.
                    <pub-id pub-id-type="doi">10.1186/s13040-023-00330-4</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref5">
                <label>5</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Khan</surname>
                            <given-names>TM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Xu</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Khan</surname>
                            <given-names>ZG</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Implementing Multilabeling, ADASYN, and ReliefF Techniques for Classification of Breast Cancer Diagnostic through Machine Learning: Efficient Computer-Aided Diagnostic System.</article-title>
                    <source>

                        <italic toggle="yes">J. Healthc. Eng.</italic>
</source>
                    <year>Mar. 2021</year>;<volume>2021</volume>:<fpage>1</fpage>&#x2013;<lpage>15</lpage>.
                    <pub-id pub-id-type="doi">10.1155/2021/5577636</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref6">
                <label>6</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Mahajan</surname>
                            <given-names>P</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Uddin</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hajati</surname>
                            <given-names>F</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Ensemble Learning for Disease Prediction: A Review.</article-title>
                    <source>

                        <italic toggle="yes">Healthcare.</italic>
</source>
                    <year>Jun. 2023</year>;<volume>11</volume>(<issue>12</issue>):<fpage>1808</fpage>.
                    <pub-id pub-id-type="doi">10.3390/healthcare11121808</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref13">
                <label>7</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Dutta</surname>
                            <given-names>A</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Early Prediction of Diabetes Using an Ensemble of Machine Learning Models.</article-title>
                    <source>

                        <italic toggle="yes">Int. J. Environ. Res. Public Health.</italic>
</source>
                    <year>Oct. 2022</year>;<volume>19</volume>(<issue>19</issue>).
                    <pub-id pub-id-type="doi">10.3390/ijerph191912378</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref17">
                <label>8</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Abdellatif</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Abdellatef</surname>
                            <given-names>H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kanesan</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Improving the Heart Disease Detection and Patients&#x2019; Survival Using Supervised Infinite Feature Selection and Improved Weighted Random Forest.</article-title>
                    <source>

                        <italic toggle="yes">IEEE Access.</italic>
</source>
                    <year>2022</year>;<volume>10</volume>:<fpage>67363</fpage>&#x2013;<lpage>67372</lpage>.
                    <pub-id pub-id-type="doi">10.1109/ACCESS.2022.3185129</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref20">
                <label>9</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Liu</surname>
                            <given-names>L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wu</surname>
                            <given-names>X</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Li</surname>
                            <given-names>S</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Solving the class imbalance problem using ensemble algorithm: application of screening for aortic dissection.</article-title>
                    <source>

                        <italic toggle="yes">BMC Med. Inform. Decis. Mak.</italic>
</source>
                    <year>Dec. 2022</year>;<volume>22</volume>(<issue>1</issue>).
                    <pub-id pub-id-type="doi">10.1186/s12911-022-01821-w</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref26">
                <label>10</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Mienye</surname>
                            <given-names>ID</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Sun</surname>
                            <given-names>Y</given-names>
                        </name>
</person-group>:
                    <article-title>Performance analysis of cost-sensitive learning methods with application to imbalanced medical data.</article-title>
                    <source>

                        <italic toggle="yes">Inform. Med. Unlocked.</italic>
</source>
                    <year>Jan. 2021</year>;<volume>25</volume>.
                    <pub-id pub-id-type="doi">10.1016/j.imu.2021.100690</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref7">
                <label>11</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Dey</surname>
                            <given-names>I</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Pratap</surname>
                            <given-names>V</given-names>
                        </name>
</person-group>:
                    <chapter-title>A Comparative Study of SMOTE, Borderline-SMOTE, and ADASYN Oversampling Techniques using Different Classifiers.</chapter-title>
                    <source>

                        <italic toggle="yes">Proceedings - 2023 3rd International Conference on Smart Data Intelligence, ICSMDI 2023.</italic>
</source>
                    <publisher-name>Institute of Electrical and Electronics Engineers Inc.</publisher-name>;<year>2023</year>; pp.<fpage>294</fpage>&#x2013;<lpage>302</lpage>.
                    <pub-id pub-id-type="doi">10.1109/ICSMDI57622.2023.00060</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref8">
                <label>12</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Chen</surname>
                            <given-names>TCT</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wu</surname>
                            <given-names>HC</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Chiu</surname>
                            <given-names>MC</given-names>
                        </name>
</person-group>:
                    <article-title>A deep neural network with modified random forest incremental interpretation approach for diagnosing diabetes in smart healthcare.</article-title>
                    <source>

                        <italic toggle="yes">Appl. Soft Comput.</italic>
</source>
                    <year>Feb. 2024</year>;<volume>152</volume>.
                    <pub-id pub-id-type="doi">10.1016/j.asoc.2023.111183</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref9">
                <label>13</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Martinez-Velasco</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mart&#x00ed;nez -Villase&#x00f1;or</surname>
                            <given-names>L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Miralles-Pechu&#x00e1;n</surname>
                            <given-names>L</given-names>
                        </name>
</person-group>:
                    <article-title>Addressing Class Imbalance in Healthcare Data: Machine Learning Solutions for Age-Related Macular Degeneration and Preeclampsia.</article-title>
                    <source>

                        <italic toggle="yes">IEEE Latin America Transactions.</italic>
</source>
                    <year>2024</year>.
                    <ext-link ext-link-type="uri" xlink:href="https://latamt.ieeer9.org/index.php/transactions/article/view/8952">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref10">
                <label>14</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Agyemang</surname>
                            <given-names>EF</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Addressing Class Imbalance Problem in Health Data Classification: Practical Application From an Oversampling Viewpoint.</article-title>
                    <source>

                        <italic toggle="yes">Applied Computational Intelligence and Soft Computing.</italic>
</source>
                    <year>2025</year>;<volume>1</volume>:<fpage>2025</fpage>.
                    <pub-id pub-id-type="doi">10.1155/acis/1013769</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref11">
                <label>15</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Abayomi-Alli</surname>
                            <given-names>OO</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Dama&#x0161;evi&#x010d;ius</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Maskeli&#x016b;nas</surname>
                            <given-names>R</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>An Ensemble Learning Model for COVID-19 Detection from Blood Test Samples.</article-title>
                    <source>

                        <italic toggle="yes">Sensors.</italic>
</source>
                    <year>Mar. 2022</year>;<volume>22</volume>(<issue>6</issue>).
                    <pub-id pub-id-type="doi">10.3390/s22062224</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref12">
                <label>16</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Elgendy</surname>
                            <given-names>IA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hosny</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Albashrawi</surname>
                            <given-names>MA</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Dual-stage explainable ensemble learning model for diabetes diagnosis.</article-title>
                    <source>

                        <italic toggle="yes">Expert Syst. Appl.</italic>
</source>
                    <year>May 2025</year>;<volume>274</volume>.
                    <pub-id pub-id-type="doi">10.1016/j.eswa.2025.126899</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref14">
                <label>17</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Alzakari</surname>
                            <given-names>SA</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Enhanced heart disease prediction in remote healthcare monitoring using IoT-enabled cloud-based XGBoost and Bi-LSTM.</article-title>
                    <source>

                        <italic toggle="yes">Alex. Eng. J.</italic>
</source>
                    <year>Oct. 2024</year>;<volume>105</volume>:<fpage>280</fpage>&#x2013;<lpage>291</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.aej.2024.06.036</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref15">
                <label>18</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Das</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Nayak</surname>
                            <given-names>SP</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Sahoo</surname>
                            <given-names>B</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <chapter-title>Evaluating Ensemble Models on Imbalanced Data Sets: A Comparative Study across Varied Minority Class Ratios.</chapter-title>
                    <source>

                        <italic toggle="yes">ESIC 2024-4th International Conference on Emerging Systems and Intelligent Computing, Proceedings.</italic>
</source>
                    <publisher-name>Institute of Electrical and Electronics Engineers Inc.</publisher-name>;<year>2024</year>; pp.<fpage>774</fpage>&#x2013;<lpage>779</lpage>.
                    <pub-id pub-id-type="doi">10.1109/ESIC60604.2024.10481583</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref16">
                <label>19</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Senthilvadivu</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ramesh</surname>
                            <given-names>PS</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Narang</surname>
                            <given-names>S</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <chapter-title>Impact of Random Forest and XGBoost Algorithms on Improving Patient Outcomes Compared to Standard Decision-Making Methods in Healthcare Predictive Analytics.</chapter-title>
                    <source>

                        <italic toggle="yes">2024 International Conference on Cybernation and Computation, CYBERCOM 2024.</italic>
</source>
                    <publisher-name>Institute of Electrical and Electronics Engineers Inc.</publisher-name>;<year>2024</year>; pp.<fpage>694</fpage>&#x2013;<lpage>699</lpage>.
                    <pub-id pub-id-type="doi">10.1109/CYBERCOM63683.2024.10803246</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref18">
                <label>20</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Wu</surname>
                            <given-names>Y</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Novel binary logistic regression model based on feature transformation of XGBoost for type 2 Diabetes Mellitus prediction in healthcare systems.</article-title>
                    <source>

                        <italic toggle="yes">Futur. Gener. Comput. Syst.</italic>
</source>
                    <year>Apr. 2022</year>;<volume>129</volume>:<fpage>1</fpage>&#x2013;<lpage>12</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.future.2021.11.003</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref19">
                <label>21</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Abnoosian</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Farnoosh</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Behzadi</surname>
                            <given-names>MH</given-names>
                        </name>
</person-group>:
                    <article-title>Prediction of diabetes disease using an ensemble of machine learning multi-classifier models.</article-title>
                    <source>

                        <italic toggle="yes">BMC Bioinformatics.</italic>
</source>
                    <year>Dec. 2023</year>;<volume>24</volume>(<issue>1</issue>).
                    <pub-id pub-id-type="doi">10.1186/s12859-023-05465-z</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref21">
                <label>22</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Masruriyah</surname>
                            <given-names>AFN</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Novita</surname>
                            <given-names>HY</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Sukmawati</surname>
                            <given-names>CE</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <chapter-title>Thorough Evaluation of the Effectiveness of SMOTE and ADASYN Oversampling Methods in Enhancing Supervised Learning Performance for Imbalanced Heart Disease Datasets.</chapter-title>
                    <source>

                        <italic toggle="yes">2023 8th International Conference on Informatics and Computing, ICIC 2023.</italic>
</source>
                    <publisher-name>Institute of Electrical and Electronics Engineers Inc.</publisher-name>;<year>2023</year>.
                    <pub-id pub-id-type="doi">10.1109/ICIC60109.2023.10382105</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref22">
                <label>23</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Orme&#x00f1;o-Arriagada</surname>
                            <given-names>P</given-names>
                        </name>

                        <name name-style="western">
                            <surname>M&#x00e1;rquez</surname>
                            <given-names>G</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Araya</surname>
                            <given-names>D</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Applying Machine Learning Sampling Techniques to Address Data Imbalance in a Chilean COVID-19 Symptoms and Comorbidities Dataset.</article-title>
                    <source>

                        <italic toggle="yes">Applied Sciences (Switzerland).</italic>
</source>
                    <year>Feb. 2025</year>;<volume>15</volume>(<issue>3</issue>).
                    <pub-id pub-id-type="doi">10.3390/app15031132</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref23">
                <label>24</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Chowdhury</surname>
                            <given-names>LH</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Tabassum</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Shatabda</surname>
                            <given-names>S</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>An optimized data analytics pipeline for improving healthcare diagnosis using ensemble learning.</article-title>
                    <source>

                        <italic toggle="yes">Inform. Med. Unlocked.</italic>
</source>
                    <year>Jan. 2025</year>;<volume>53</volume>.
                    <pub-id pub-id-type="doi">10.1016/j.imu.2025.101623</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref24">
                <label>25</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Prithula</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Improved pediatric ICU mortality prediction for respiratory diseases: machine learning and data subdivision insights.</article-title>
                    <source>

                        <italic toggle="yes">Respir. Res.</italic>
</source>
                    <year>Dec. 2024</year>;<volume>25</volume>(<issue>1</issue>).
                    <pub-id pub-id-type="doi">10.1186/s12931-024-02753-x</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref25">
                <label>26</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Chowdhury</surname>
                            <given-names>MM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ayon</surname>
                            <given-names>RS</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hossain</surname>
                            <given-names>MS</given-names>
                        </name>
</person-group>:
                    <article-title>An investigation of machine learning algorithms and data augmentation techniques for diabetes diagnosis using class imbalanced BRFSS dataset.</article-title>
                    <source>

                        <italic toggle="yes">Healthcare Analytics.</italic>
</source>
                    <year>Jun. 2024</year>;<volume>5</volume>.
                    <pub-id pub-id-type="doi">10.1016/j.health.2023.100297</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref27">
                <label>27</label>
                <mixed-citation publication-type="other">
                    <collab>UCI MACHINE LEARNING</collab>:
                    <article-title>Pima Indians Diabetes Database.</article-title>
                    <source>

                        <italic toggle="yes">Version 1.</italic>
</source>Accessed: Feb. 09, 2025.
                    <ext-link ext-link-type="uri" xlink:href="https://archive.ics.uci.edu/ml/datasets/pima+indians+diabetes">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref28">
                <label>28</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Haberman</surname>
                            <given-names>S</given-names>
                        </name>
</person-group>:
                    <article-title>Haberman&#x2019;s Survival.</article-title>
                    <year>1976</year>.
                    <pub-id pub-id-type="doi">10.24432/C5XK51</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref29">
                <label>29</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Lubicz Marek</surname>
                            <given-names>PKRA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kolodziej</surname>
                            <given-names>J</given-names>
                        </name>
</person-group>:
                    <article-title>Thoracic Surgery Data.</article-title>
                    <year>2014</year>.
                    <pub-id pub-id-type="doi">10.24432/C5Z60N</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref30">
                <label>30</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Fernandes Kelwin</surname>
                            <given-names>CJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Fernandes</surname>
                            <given-names>J</given-names>
                        </name>
</person-group>:
                    <article-title>Cervical Cancer (Risk Factors).</article-title>
                    <year>2017</year>.
                    <pub-id pub-id-type="doi">10.24432/C5Z310</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref31">
                <label>31</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Song</surname>
                            <given-names>Y</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Comparison of logistic regression and machine learning methods for predicting postoperative delirium in elderly patients: A retrospective study.</article-title>
                    <source>

                        <italic toggle="yes">CNS Neurosci. Ther.</italic>
</source>
                    <year>Jan. 2023</year>;<volume>29</volume>(<issue>1</issue>):<fpage>158</fpage>&#x2013;<lpage>167</lpage>.
                    <pub-id pub-id-type="doi">10.1111/cns.13991</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref32">
                <label>32</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Rahmatinejad</surname>
                            <given-names>Z</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A comparative study of explainable ensemble learning and logistic regression for predicting in-hospital mortality in the emergency department.</article-title>
                    <source>

                        <italic toggle="yes">Sci. Rep.</italic>
</source>
                    <year>Feb. 2024</year>;<volume>14</volume>(<issue>1</issue>):<fpage>3406</fpage>.
                    <pub-id pub-id-type="doi">10.1038/s41598-024-54038-4</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref33">
                <label>33</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Rajendra</surname>
                            <given-names>P</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Latifi</surname>
                            <given-names>S</given-names>
                        </name>
</person-group>:
                    <article-title>Prediction of diabetes using logistic regression and ensemble techniques.</article-title>
                    <source>

                        <italic toggle="yes">Computer Methods and Programs in Biomedicine Update.</italic>
</source>
                    <year>2021</year>;<volume>1</volume>:<fpage>100032</fpage>.
                    <pub-id pub-id-type="doi">10.1016/j.cmpbup.2021.100032</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref34">
                <label>34</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Nagarajan</surname>
                            <given-names>SM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Muthukumaran</surname>
                            <given-names>V</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Murugesan</surname>
                            <given-names>R</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Feature selection model for healthcare analysis and classification using classifier ensemble technique.</article-title>
                    <source>

                        <italic toggle="yes">Int. J. Syst. Assur. Eng. Manag.</italic>
</source>
                    <year>May 2021</year>.
                    <pub-id pub-id-type="doi">10.1007/s13198-021-01126-7</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref35">
                <label>35</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Nguyen</surname>
                            <given-names>D-K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lan</surname>
                            <given-names>C-H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Chan</surname>
                            <given-names>C-L</given-names>
                        </name>
</person-group>:
                    <article-title>Deep Ensemble Learning Approaches in Healthcare to Enhance the Prediction and Diagnosing Performance: The Workflows, Deployments, and Surveys on the Statistical, Image-Based, and Sequential Datasets.</article-title>
                    <source>

                        <italic toggle="yes">Public Health.</italic>
</source>
                    <year>2021</year>;<volume>18</volume>:<fpage>10811</fpage>.
                    <pub-id pub-id-type="doi">10.3390/ijerph</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref36">
                <label>36</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Das</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Nayak</surname>
                            <given-names>SP</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Sahoo</surname>
                            <given-names>B</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Machine Learning in Healthcare Analytics: A State-of-the-Art Review.</article-title>
                    <source>

                        <italic toggle="yes">Archives of Computational Methods in Engineering.</italic>
</source>
                    <year>Apr. 2024</year>.
                    <pub-id pub-id-type="doi">10.1007/s11831-024-10098-3</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref37">
                <label>37</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Verma</surname>
                            <given-names>I</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Prasad</surname>
                            <given-names>SK</given-names>
                        </name>
</person-group>:
                    <article-title>Exploring Ensemble Learning Techniques for Infant Mortality Prediction: A Technical Analysis of XGBoost Stacking AdaBoost and Bagging Models.</article-title>
                    <source>

                        <italic toggle="yes">Birth Defects Res.</italic>
</source>
                    <year>Feb. 2025</year>;<volume>117</volume>(<issue>2</issue>).
                    <pub-id pub-id-type="doi">10.1002/bdr2.2443</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref39">
                <label>38</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Imani</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Beikmohammadi</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Arabnia</surname>
                            <given-names>HR</given-names>
                        </name>
</person-group>:
                    <article-title>Comprehensive Analysis of Random Forest and XGBoost Performance with SMOTE, ADASYN, and GNUS Under Varying Imbalance Levels.</article-title>
                    <source>

                        <italic toggle="yes">Technologies (Basel).</italic>
</source>
                    <year>Mar. 2025</year>;<volume>13</volume>(<issue>3</issue>).
                    <pub-id pub-id-type="doi">10.3390/technologies13030088</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref40">
                <label>39</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kabir</surname>
                            <given-names>MA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ahmed</surname>
                            <given-names>MU</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Begum</surname>
                            <given-names>S</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <chapter-title>Balancing Fairness: Unveiling the Potential of SMOTE-Driven Oversampling in AI Model Enhancement.</chapter-title>
                    <source>

                        <italic toggle="yes">ACM International Conference Proceeding Series.</italic>
</source>
                    <publisher-name>Association for Computing Machinery</publisher-name>;<year>May 2024</year>; pp.<fpage>21</fpage>&#x2013;<lpage>29</lpage>.
                    <pub-id pub-id-type="doi">10.1145/3674029.3674034</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref41">
                <label>40</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Mohammedqasim</surname>
                            <given-names>H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ahmed Jasim</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mohammedqasem</surname>
                            <given-names>A</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>ENHANCING PREDICTIVE PERFORMANCE IN COVID-19 HEALTHCARE DATASETS: A CASE STUDY BASED ON HYPER ADASYN OVER-SAMPLING AND GENETIC FEATURE SELECTION.</article-title>
                    <year>2024</year>.</mixed-citation>
            </ref>
            <ref id="ref42">
                <label>41</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Al-Shehari</surname>
                            <given-names>T</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Comparative evaluation of data imbalance addressing techniques for CNN-based insider threat detection.</article-title>
                    <source>

                        <italic toggle="yes">Sci. Rep.</italic>
</source>
                    <year>Dec. 2024</year>;<volume>14</volume>(<issue>1</issue>).
                    <pub-id pub-id-type="doi">10.1038/s41598-024-73510-9</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref43">
                <label>42</label>
                <mixed-citation publication-type="book">
                    <chapter-title>Differential Evolution.</chapter-title>
                    <source>

                        <italic toggle="yes">Differential Evolution.</italic>
</source>
                    <publisher-loc>Boston, MA</publisher-loc>:
                    <publisher-name>Springer US</publisher-name>;
pp.<fpage>1</fpage>&#x2013;<lpage>24</lpage>.
                    <pub-id pub-id-type="doi">10.1007/978-0-387-36896-2_1</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref44">
                <label>43</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ahmad</surname>
                            <given-names>MF</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Isa</surname>
                            <given-names>NAM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lim</surname>
                            <given-names>WH</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Differential evolution: A recent review based on state-of-the-art works.</article-title>
                    <source>

                        <italic toggle="yes">Alex. Eng. J.</italic>
</source>
                    <year>May 2022</year>;<volume>61</volume>(<issue>5</issue>):<fpage>3831</fpage>&#x2013;<lpage>3872</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.aej.2021.09.013</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref45">
                <label>44</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Song</surname>
                            <given-names>Y</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Dynamic hybrid mechanism-based differential evolution algorithm and its application.</article-title>
                    <source>

                        <italic toggle="yes">Expert Syst. Appl.</italic>
</source>
                    <year>Mar. 2023</year>;<volume>213</volume>:<fpage>118834</fpage>.
                    <pub-id pub-id-type="doi">10.1016/j.eswa.2022.118834</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref46">
                <label>45</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Zhang</surname>
                            <given-names>SX</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Liu</surname>
                            <given-names>YH</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Zheng</surname>
                            <given-names>LM</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Differential evolution with collective ensemble learning.</article-title>
                    <source>

                        <italic toggle="yes">Swarm Evol. Comput.</italic>
</source>
                    <year>Jun. 2024</year>;<volume>87</volume>:<fpage>101521</fpage>.
                    <pub-id pub-id-type="doi">10.1016/j.swevo.2024.101521</pub-id>
                </mixed-citation>
            </ref>
        </ref-list>
    </back>
    <sub-article article-type="reviewer-report" id="report453315">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.195381.r453315</article-id>
            <title-group>
                <article-title>Reviewer response for version 2</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Ikerionwu</surname>
                        <given-names>Charles</given-names>
                    </name>
                    <xref ref-type="aff" rid="r453315a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-9946-6307</uri>
                </contrib>
                <aff id="r453315a1">
                    <label>1</label>Federal University of Technology, Owerri, Nigeria</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>11</day>
                <month>2</month>
                <year>2026</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2026 Ikerionwu C</copyright-statement>
                <copyright-year>2026</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport453315" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.169456.2"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>Having effected the corrections, the author's paper can be accepted in its current form.</p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>Yes</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>Partly</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Yes</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>Yes</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>No</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Partly</p>
            <p>Reviewer Expertise:</p>
            <p>Application of AI/ML in multidisciplinary research such as agriculture, software engineering, health and energy. Software process improvement and data science.</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard.</p>
        </body>
    </sub-article>
    <sub-article article-type="reviewer-report" id="report443187">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.186799.r443187</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Ikerionwu</surname>
                        <given-names>Charles</given-names>
                    </name>
                    <xref ref-type="aff" rid="r443187a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-9946-6307</uri>
                </contrib>
                <aff id="r443187a1">
                    <label>1</label>Federal University of Technology, Owerri, Nigeria</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>5</day>
                <month>1</month>
                <year>2026</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2026 Ikerionwu C</copyright-statement>
                <copyright-year>2026</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport443187" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.169456.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve-with-reservations</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>Abstract:&#x00a0;</p>
            <p> Improve the quantification of the result.</p>
            <p> </p>
            <p> </p>
            <p> Body of the work:</p>
            <p> Numbering of Equation is preferably done on the right-hand side.</p>
            <p> </p>
            <p> The authors appear to establish a gap using this sentence:&#x00a0;</p>
            <p> "Some of these studies enhanced the performance to some extent, but they were not flexible in assigning weights based on the weights and did not directly optimize the corresponding relevant evaluation measure." However, the sentence could not clearly establish the gap the study is pursuing.&#x00a0;</p>
            <p> Supporting the gap with a citation would present the gap clearly.</p>
            <p> </p>
            <p> What type of gap has been identified? - methodology, knowledge, etc. In the conclusion, no reference was made to the findings and how it closed the gap.&#x00a0;</p>
            <p> </p>
            <p> In addressing imbalance dataset, SMOTE and ADSYN have been used in earlier research, yet this study claims it is novel.&#x00a0;</p>
            <p> </p>
            <p> The use of&#x00a0;Logistic Regression (LR), Random Forest (RF), and XGBoost (XGB) has been adopted in earlier research. The same is DE.&#x00a0;</p>
            <p> </p>
            <p> Thus, the main contributions of this study as listed in section 1 should be revisited.&#x00a0;</p>
            <p> </p>
            <p> </p>
            <p> </p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>Yes</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>Partly</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Yes</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>Yes</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>No</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Partly</p>
            <p>Reviewer Expertise:</p>
            <p>Application of AI in multidisciplinary research such as agriculture, software engineering, health and energy. Software process improvement and data science.</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above.</p>
        </body>
        <sub-article article-type="response" id="comment15284-443187">
            <front-stub>
                <contrib-group>
                    <contrib contrib-type="author">
                        <name>
                            <surname>Sahoo</surname>
                            <given-names>Biswajit</given-names>
                        </name>
                        <aff>School of Computer Engineering, Kalinga Institute of Industrial Technology, Bhubaneswar, Odisha, India</aff>
                    </contrib>
                </contrib-group>
                <author-notes>
                    <fn fn-type="conflict">
                        <p>
                            <bold>Competing interests: </bold>There are no competing interests.</p>
                    </fn>
                </author-notes>
                <pub-date pub-type="epub">
                    <day>16</day>
                    <month>1</month>
                    <year>2026</year>
                </pub-date>
            </front-stub>
            <body>
                <p>We would like to thank the reviewer for their detailed and constructive feedback. The comments regarding the quantification of results, the definition of the research gap, and the clarification of our novel contributions were particularly helpful. We have carefully revised the manuscript to address these points. Below, we provide a point-by-point response to each comment.</p>
                <p> 
                    <bold>Comment 1: </bold>
                    <italic>"Improve the quantification of the result."</italic>
                </p>
                <p> We appreciate this suggestion. We have revised the abstract to replace qualitative statements (e.g., "consistently outperforms") with specific, quantified evidence from our experiments. Specifically, we have highlighted the significant performance margin achieved on the most difficult dataset (Thoracic) and the peak performance on the best-performing dataset (Cervical Cancer).</p>
                <p> </p>
                <p> 
                    <bold>Comment 2: </bold>
                    <italic>"Numbering of Equation is preferably done on the right-hand side."</italic>
                </p>
                <p> Thank you for pointing this out. We originally formatted the manuscript with standard right-aligned equation numbering. The current layout (with numbers appearing on a separate line or to the left) was introduced during the journal's typesetting and production process for the published version.</p>
                <p> In the revised version (Version 2), we will explicitly request the F1000Research production team to correct the alignment to ensure equation numbers appear on the right-hand side, consistent with standard mathematical formatting.</p>
                <p> </p>
                <p> 
                    <bold>Comment 3: </bold>
                    <italic>"The authors appear to establish a gap using this sentence... However, the sentence could not clearly establish the gap the study is pursuing. Supporting the gap with a citation would present the gap clearly."</italic>
                </p>
                <p> We acknowledge that the original gap analysis was vague. We have rewritten this section to explicitly contrast the limitation of "static" ensemble weights and "surrogate loss functions" (used in standard methods) against our proposed method. We have also added citations to support the claim that standard gradient-based methods cannot directly optimize non-differentiable metrics like AUC.</p>
                <p> </p>
                <p> 
                    <bold>Comment 4: </bold>
                    <italic>"What type of gap has been identified? - methodology, knowledge, etc. In the conclusion, no reference was made to the findings and how it closed the gap."</italic>
                </p>
                <p> We have identified this as a 
                    <bold>Methodological Gap</bold>. The existing methods utilize optimization techniques that require differentiable functions, preventing them from optimizing the AUC directly. We have revised the conclusion to explicitly state this gap type and provided specific reference to our findings (specifically the Thoracic dataset results) to demonstrate how our methodology successfully closed this gap where traditional models failed.</p>
                <p> </p>
                <p> 
                    <bold>Comment 5: </bold>
                    <italic>"In addressing an imbalanced dataset, SMOTE and ADSYN have been used in earlier research, yet this study claims it is novel."</italic>
                </p>
                <p> We agree with the reviewer that SMOTE and ADASYN are well-established techniques. We have clarified in the manuscript that we do not claim the 
                    <italic>use</italic> of SMOTE/ADASYN as the primary novelty. Rather, these are employed as necessary preprocessing steps to ensure the base learners are competent. The novelty lies in the 
                    <bold>ensemble integration framework</bold> that optimizes the combination of these preprocessed learners.</p>
                <p> </p>
                <p> 
                    <bold>Comment 6: </bold>
                    <italic>"The use of Logistic Regression (LR), Random Forest (RF), and XGBoost (XGB) has been adopted in earlier research. The same is DE."</italic>
                </p>
                <p> This is a crucial observation. While the individual components (LR, RF, XGB, DE) are indeed established, the 
                    <bold>architectural integration</bold> proposed in OEDE is novel. Standard stacking ensembles use a meta-learner (like Logistic Regression) that minimizes error via Gradient Descent. Because AUC is non-differentiable, standard stacking cannot optimize AUC directly.</p>
                <p> Our contribution is the 
                    <bold>methodological innovation</bold> of replacing the standard meta-learner with a Differential Evolution optimizer. This allows the ensemble to solve the "weights assignment" problem as a global optimization task, directly maximizing AUC. This specific integration addresses the methodological gap described in Comment 4.</p>
                <p> </p>
                <p> 
                    <bold>Comment 7: </bold>
                    <italic>"Thus, the main contributions of this study as listed in section 1 should be revisited."</italic>
                </p>
                <p> In light of the feedback regarding novelty (Comments 5 and 6), we have rewritten the contributions section to focus on the 
                    <bold>integration strategy</bold> and the 
                    <bold>direct optimization capability</bold>, rather than simply listing the algorithms used.</p>
            </body>
        </sub-article>
    </sub-article>
    <sub-article article-type="reviewer-report" id="report428576">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.186799.r428576</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Al-shanableh</surname>
                        <given-names>Najah</given-names>
                    </name>
                    <xref ref-type="aff" rid="r428576a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0001-9877-8782</uri>
                </contrib>
                <aff id="r428576a1">
                    <label>1</label>Al al-Bayt University, Mafraq, Jordan</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>22</day>
                <month>11</month>
                <year>2025</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2025 Al-shanableh N</copyright-statement>
                <copyright-year>2025</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport428576" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.169456.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>The assessment of the submitted work indicates that it meets all key criteria for clarity, rigor, transparency, and reproducibility. The presentation of the work is clear, accurate, and well-structured, with appropriate citations to current and relevant literature. This demonstrates a solid understanding of the field and ensures that the study is well-grounded in existing research.</p>
            <p> The study design is appropriate for the stated objectives, and the technical execution appears sound. The methodological steps and analytical procedures are described in sufficient detail to allow independent replication by other researchers, which strengthens the credibility and scientific value of the work.</p>
            <p> Overall, the work demonstrates strong methodological rigor, clarity of presentation, and adherence to good scientific practices.</p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>Yes</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>Not applicable</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Yes</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>Yes</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>Yes</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Yes</p>
            <p>Reviewer Expertise:</p>
            <p>My research focuses on advancing data-driven solutions at the intersection of artificial intelligence, machine learning, data mining, and healthcare analytics.</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard.</p>
        </body>
        <back>
            <ref-list>
                <title>References</title>
                <ref id="rep-ref-428576-1">
                    <label>1</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Advanced Ensemble Machine Learning Techniques for Optimizing Diabetes Mellitus Prognostication: A Detailed Examination of Hospital Data</article-title>.
                        <source>
                            <italic>Data and Metadata</italic>
                        </source>.<year>2024</year>;<volume>3</volume>:
                        <elocation-id>10.56294/dm2024.363</elocation-id>
                        <pub-id pub-id-type="doi">10.56294/dm2024.363</pub-id>
                    </mixed-citation>
                </ref>
            </ref-list>
        </back>
    </sub-article>
</article>
