<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN" "http://jats.nlm.nih.gov/publishing/1.2/JATS-journalpublishing1.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="1.2" xml:lang="en">
    <front>
        <journal-meta>
            <journal-id journal-id-type="pmc">F1000Research</journal-id>
            <journal-title-group>
                <journal-title>F1000Research</journal-title>
            </journal-title-group>
            <issn pub-type="epub">2046-1402</issn>
            <publisher>
                <publisher-name>F1000 Research Limited</publisher-name>
                <publisher-loc>London, UK</publisher-loc>
            </publisher>
        </journal-meta>
        <article-meta>
            <article-id pub-id-type="doi">10.12688/f1000research.72929.2</article-id>
            <article-categories>
                <subj-group subj-group-type="heading">
                    <subject>Research Article</subject>
                </subj-group>
                <subj-group>
                    <subject>Articles</subject>
                </subj-group>
            </article-categories>
            <title-group>
                <article-title>Utilizing data sampling techniques on algorithmic fairness for customer churn prediction with data imbalance problems</article-title>
                <fn-group content-type="pub-status">
                    <fn>
                        <p>[version 2; peer review: 1 approved, 2 approved with reservations]</p>
                    </fn>
                </fn-group>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Maw</surname>
                        <given-names>Maw</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Haw</surname>
                        <given-names>Su-Cheng</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Funding Acquisition</role>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Supervision</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-7190-0837</uri>
                    <xref ref-type="corresp" rid="c1">a</xref>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Ho</surname>
                        <given-names>Chin-Kuan</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <aff id="a1">
                    <label>1</label>Faculty of Computing and Informatics, Multimedia University, Cyberjaya, Selangor, 63100, Malaysia</aff>
            </contrib-group>
            <author-notes>
                <corresp id="c1">
                    <label>a</label>
                    <email xlink:href="mailto:sucheng@mmu.edu.my">sucheng@mmu.edu.my</email>
                </corresp>
                <fn fn-type="conflict">
                    <p>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>27</day>
                <month>6</month>
                <year>2022</year>
            </pub-date>
            <pub-date pub-type="collection">
                <year>2021</year>
            </pub-date>
            <volume>10</volume>
            <elocation-id>988</elocation-id>
            <history>
                <date date-type="accepted">
                    <day>7</day>
                    <month>6</month>
                    <year>2022</year>
                </date>
            </history>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2022 Maw M et al.</copyright-statement>
                <copyright-year>2022</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access article distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <self-uri content-type="pdf" xlink:href="https://f1000research.com/articles/10-988/pdf"/>
            <abstract>
                <p>
                    <bold>Background:</bold> Customer churn prediction (CCP) refers to detecting which customers are likely to cancel the services provided by a service provider, for example, internet services. The class imbalance problem (CIP) in machine learning occurs when there is a huge difference in the samples of positive class compared to the negative class. It is one of the major obstacles in CCP as it deteriorates performance in the classification process. Utilizing data sampling techniques (DSTs) helps to resolve the CIP to some extent.</p>
                <p>
                    <bold>Methods:</bold> In this paper, we review the effect of using DSTs on algorithmic fairness, i.e., to investigate whether the results pose any discrimination between male and female groups and compare the results before and after using DSTs. Three real-world datasets with unequal balancing rates were prepared and four ubiquitous DSTs were applied to them. Six popular classification techniques were utilized in the classification process. Both classifier&#x2019;s performance and algorithmic fairness are evaluated with notable metrics.</p>
                <p>
                    <bold>Results:</bold> The results indicated that Random Forest classifier outperforms other classifiers in all three datasets and, using SMOTE and ADASYN techniques cause more discrimination in the female group. The rate of unintentional discrimination seems to be higher in the original data of extremely unbalanced datasets under the following classifiers: Logistics Regression, LightGBM, and XGBoost.</p>
                <p>
                    <bold>Conclusions:</bold> Algorithmic fairness has become a broadly studied area in recent years, yet there is a very little systematic study on the effect of using DSTs on algorithmic fairness. This study presents important findings to further the use of algorithmic fairness in CCP research.</p>
            </abstract>
            <kwd-group kwd-group-type="author">
                <kwd>Customer churn prediction</kwd>
                <kwd>Data sampling techniques</kwd>
                <kwd>Algorithmic fairness</kwd>
                <kwd>Class imbalance problem</kwd>
            </kwd-group>
            <funding-group>
                <award-group id="fund-1">
                    <funding-source>Telekom Research &amp; Development</funding-source>
                    <award-id>MMUE/160013</award-id>
                </award-group>
                <funding-statement>This work is supported by the funding of TM Research &amp; Development from Telekom Malaysia, Malaysia (Ref: MMUE/160013).</funding-statement>
                <funding-statement>
                    <italic>The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</italic>
                </funding-statement>
            </funding-group>
        </article-meta>
        <notes>
            <sec sec-type="version-changes">
                <label>Revised</label>
                <title>Amendments from Version 1</title>
                <p>In the revised version of the paper, we updated the introduction section with the applications of algorithmic fairness in two significant areas, recommender systems, and customer churn predictions. In addition, we also define the scope of the work. Three versions of data are prepared as extremely imbalanced (i.e., 5% non-churn and 95% churn rate), very imbalanced (i.e., 15% non-churn and 85% churn rate) and normal imbalanced (i.e., 30% non-churn and 70% churn rate). The classification results are demonstrated with the learning curves with the Random Forest classifier to provide a clear vision of the model&#x2019;s performance in these datasets and discussed briefly in the discussion section. We added a compact discussion on the major limitations of the study in the conclusion section. We believe this revised version of the paper would yield the readers with the insightful knowledge about the requirement to consider algorithmic fairness in machine-based decision tasks, especially in rapidly growing ecommerce businesses.</p>
            </sec>
        </notes>
    </front>
    <body>
        <sec id="sec1" sec-type="intro">
            <title>Introduction</title>
            <p>Customer churn, the phenomenon in which customers are shifting to rival companies due to dissatisfaction with the existing services or for other inevitable reasons,
                <xref ref-type="bibr" rid="ref1">
                    <sup>1</sup>
                </xref> is one of the common issues usually encountered in every customer-oriented sector, including telecommunication. Customer churn prediction (CCP) is a supervised binary classification procedure that detects the potential churners before they are churned. Since there are no standardized principles for collecting data for CCP tasks, data distribution between classes will be varied from one data set to another. Therefore, one class might have extremely underrepresented compared to another class. In CCP, the target class is those being churned or not. To be exact, churn is always a minority class when the non-churn class usually comes in large numbers. Therefore, churn is used to consider a rare object
                <xref ref-type="bibr" rid="ref2">
                    <sup>2</sup>
                </xref> in service-based domains including telecom. Thus, telecom datasets always suffer from a class imbalance problem (CIP) and lead to a situation in which minority instances remain unlearned.</p>
            <p>Advanced machine learning techniques can be applied to predict potential churners. Let us consider a dataset with 10,000 data instances with 10% of churn samples i.e., 1000 churners and 9,000 non-churners. Even if a carefully built model could predict 90% correctly on the minority class, it means 100 customers are misclassified to the wrong class. Suppose 60 churners are misclassified as non-churners, i.e., false negatives, the company will lose a huge amount of revenue since recruiting new customers is more expensive than keeping the existing ones.
                <xref ref-type="bibr" rid="ref3">
                    <sup>3</sup>
                </xref> Thus, the ultimate goal in the telecom sector is to increase profit by decreasing customer churn. Hence, CIP is a block when trying to achieve the major goal of CCP, since it degrades classification accuracy. Algorithmic fairness has become a very active research topic since ProPublica observed that the algorithms could yield discriminative outcomes, which impacted a minority group in real life.
                <xref ref-type="bibr" rid="ref4">
                    <sup>4</sup>
                </xref>
            </p>
            <p>Algorithmic fairness is monitored in line with the protected features or sensitive variables in the dataset. Sensitive data could be, but is not limited to gender, race, age group, or religion. Algorithmic fairness is achieved if the decisions generated by a model do not favor more or less any individual or a group.
                <xref ref-type="bibr" rid="ref5">
                    <sup>5</sup>
                </xref> The lesser the bias in the training data, the bigger the chance of achieving algorithmic fairness. However, it is almost not possible to train a zero-bias model since the historical data could have contained bias for many reasons.
                <xref ref-type="bibr" rid="ref6">
                    <sup>6</sup>
                </xref> The common reasons for bias in the training data involve the compounding of initial bias over time, using proxy variables, and unbalancing of sample size between minority and majority groups.
                <xref ref-type="bibr" rid="ref7">
                    <sup>7</sup>
                </xref>
            </p>
            <p>In the CCP process, customers&#x2019; behavior is analyzed within specific time windows, for example within one month.
                <xref ref-type="bibr" rid="ref8">
                    <sup>8</sup>
                </xref> Once the prediction is done, the outcomes are reused as training data for the next prediction. Therefore, there are high chance to have repeated bias in the historical data without even noticing. This fact suggests why the algorithmic fairness issue should be considered in building of CCP models. Since CIP is a major concern in CCP process, one solution for CIP is to apply data sampling techniques (DSTs) to the training data. Since the major function of DSTs is to increase or decrease the sample instances to balance between majority and minority classes, there are changes in the number of samples in the different groups in the dataset. Even though there are high chances of having bad impacts of using DSTs on algorithmic fairness, the research works on that specific topic are minimally found. This motivates us to start our research work on an evaluation of DSTs&#x2019; impacts on algorithmic fairness in the CCP process.</p>
            <p>In e-commerce businesses, the role of artificial intelligence has become very important, and it plays a major part to run the businesses more smoothly. On the other hand, along with the challenges of Covid-19, the trends of the customer-oriented businesses have rapidly transformed into the online platforms which need to heavily rely on the machine learning models to provide the precise personalized experiences to the customers. Thus, the works on preventing customer churns and enhancing recommendation systems are hot topics in the e-commerce research. Despite the research demand, there are very minimum works that are closely related to our study. Some of the prominent studies are organized in 
                <xref ref-type="table" rid="T1">Table 1</xref>.</p>
            <table-wrap id="T1" orientation="portrait" position="float">
                <label>Table 1. </label>
                <caption>
                    <title>Description of related studies.</title>
                </caption>
                <table content-type="article-table" frame="hsides">
                    <thead>
                        <tr>
                            <th align="left" colspan="1" rowspan="1" valign="top">Research works</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Description</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Reference</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Gui, Chun (2017)</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">In the article, the authors experimented with three kinds of feature selection techniques, Random Forest (RF), Relative Weight, Standardized regression coefficients, three types of DSTs, and RF classification algorithm on the real-world dataset with 45000 records to analyze the imbalanced dataset problem in CCP.</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <xref ref-type="bibr" rid="ref3">3</xref>
                            </td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Mehrotra et al. (2018)</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">This research work proposed a conceptual and computational framework and several policies for optimizing the relevance of the recommendation systems.</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <xref ref-type="bibr" rid="ref9">9</xref>
                            </td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Valentim et al. (2019)</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">The authors evaluated the impact of different data preparation techniques mainly for sensitive data removal, encoding of categorical data, and data selection techniques by testing on two well-known public datasets. But their work was not focused on the DSTs&#x2019; impact on algorithmic and they tested only on very small public datasets.</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <xref ref-type="bibr" rid="ref10">10</xref>
                            </td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Beutel et al. (2019)</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">The study introduced a new set of fairness evaluation metrics for the recommender systems by using pairwise regularization.</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <xref ref-type="bibr" rid="ref11">11</xref>
                            </td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Patro et al. (2020)</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">The authors investigated the fair recommendation in the context of two-sided online platforms and proposed a novel mapping called FairRec which provides the effectiveness in incurring a marginal loss in the recommendation process.</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <xref ref-type="bibr" rid="ref12">12</xref>
                            </td>
                        </tr>
                    </tbody>
                </table>
            </table-wrap>
            <p>In this paper, we evaluate how the DSTs applied in the data preparation stage of the CCP process impact the algorithmic fairness by testing on three versions of real-world Telecom customer churn datasets. Therefore, the main goal of this study is to explore and identify the impact of using DSTs on training data on algorithmic fairness in the CCP process. To the best of our knowledge, there is very little research concerning algorithmic fairness in the CCP process. We believe the findings of this study would provide valuable insights into future CCP research.</p>
            <p>In the following, we elaborate on the methodologies applied in the experimental procedure. We list all of the classification results and learning curve results, performance measures for fairness in the next section. The significant results are specifically discussed in the discussion section. After that, we discuss the challenges and opportunities gained from this study. We conclude the article by discussing the limitations and future works of the study.</p>
        </sec>
        <sec id="sec2" sec-type="methods">
            <title>Methods</title>
            <p>Ethical Approval Number: EA1742021</p>
            <p>Ethical Approval Body: Research Ethics Committee 2021, Multimedia University</p>
            <p>In this study, the original data set is prepared to make three versions of unbalanced datasets, with rates of 5% (i.e., 95% of non-churn data and 5% churn data), 15%, and 30%. Each version is applied with four DSTs and compared the results with the unsampled original dataset to evaluate the classification performance and impacts on algorithmic fairness. The step-by-step methods to conduct the study are presented in 
                <xref ref-type="fig" rid="f1">Figure 1</xref>.</p>
            <fig fig-type="figure" id="f1" orientation="portrait" position="float">
                <label>Figure 1. </label>
                <caption>
                    <title>Procedures of the study.</title>
                </caption>
                <graphic id="gr1" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/134711/6b017cdd-3923-4ae7-95ae-bb282ba864fc_figure1.gif"/>
            </fig>
            <sec id="sec3">
                <title>Datasets</title>
                <p>A real-world telecom dataset was provided by one of Malaysia&#x2019;s leading telecom companies (see 
                    <italic toggle="yes">Underlying data</italic> for details on access to this dataset). The original dataset contains 1,265,535 customer records, which were collected from January 2011 to December 2011. Since the original data set is huge in volume, we randomly selected 100,000 records and utilized them for this study as we prefer to start with a smaller sample of data due to the limited resource allocation. We included demographics, call information, network usage, billing information, and customer satisfaction data in our dataset since they are considered influential factors in the CCP process.
                    <xref ref-type="bibr" rid="ref13">
                        <sup>13</sup>
                    </xref>
                    <sup>,</sup>
                    <xref ref-type="bibr" rid="ref14">
                        <sup>14</sup>
                    </xref> A total of 22 features were extracted after careful aggregation, i.e., new features were created based on the original data and some unnecessary features were deleted from it, and features are listed in 
                    <xref ref-type="table" rid="T2">Table 2</xref>.</p>
                <table-wrap id="T2" orientation="portrait" position="float">
                    <label>Table 2. </label>
                    <caption>
                        <title>Features used in the real-world dataset.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">No.</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Name of the features</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Description</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">1</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Customer ID</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Customer ID</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">2</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Age</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Age of customer</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">3</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Is senior</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Is the customer over 60 or not</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">4</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Gender</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Gender of customer</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">5</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Is local</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Is the customer a Malaysian or an international?</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">6</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Race</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Is the customer Malay or Indian or Chinese or Other?</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">7</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Technical-problem-count</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Total technical complaints and general complaints made by a customer</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">8</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Complain-count</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Total general complaints made by a customer</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">9</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Avrg download</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Average download rate</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">10</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Avrg upload</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Average upload rate</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">11</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">T-Location</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">The location where the customer registered for the service</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">12</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">HSBB area</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Is the customer in the area where a high-speed connection is required or not</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">13</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Speed</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">The broadband speed customer has registered for</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">14</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Price start</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">The value of the package the customer has bought</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">15</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Contract period</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">The contract period of the customer</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">16</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Median- outstanding</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Average overdue fees</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">17</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Avrg local amt</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Average amount spends on calling locally</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">18</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Avrg std amt</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Average amount spends on subscriber trunk dialing</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">19</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Avrg idd amt</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Average amount spends on international calls</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">20</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Avrg voice usage</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Average amount spends on voice calls</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">21</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Avrg dialup amt</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Average amount spends on dialup service</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">22</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Churn</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Whether the customer is churned or not</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>The final dataset was prepared with three different rates of unbalancing: 5%, 15%, and 30%. We created a Python script (see 
                    <italic toggle="yes">Extended data</italic>) which used the Pandas tool of Scikit-learn machine learning library to prepare three versions of datasets. We set up these specific rates because we wanted to experiment with extremely unbalanced cases up to intermediate levels.</p>
            </sec>
            <sec id="sec4">
                <title>Data preprocessing</title>
                <p>In the data preprocessing stage, we excluded any null values. Since we found only a few outliers in the selected dataset, we manually removed them without using any specific procedure. We applied four DSTs to the data: Random Over Sampler (ROS), Random Under Sampler (RUS),
                    <xref ref-type="bibr" rid="ref15">
                        <sup>15</sup>
                    </xref> Synthetic Minority Oversampling Technique (SMOTE),
                    <xref ref-type="bibr" rid="ref16">
                        <sup>16</sup>
                    </xref> and Adaptive Synthetic Oversampling Technique (ADASYN).
                    <xref ref-type="bibr" rid="ref17">
                        <sup>17</sup>
                    </xref> The selection of DSTs was based on their popularity and to know the impact of each of them on the algorithmic fairness in the CCP process.</p>
            </sec>
            <sec id="sec5">
                <title>Classification of data</title>
                <p>We applied six popular classifiers: Random Forest (RF), Decision Tree (DT), LightGBM (LGBM), Gradient Boosting (GB), Logistics Regression (LR), and XGBoost.
                    <xref ref-type="bibr" rid="ref18">
                        <sup>18</sup>
                    </xref> We created our own Python script (see 
                    <italic toggle="yes">Extended</italic> data) using Scikit-learn machine learning library to perform this step. After a careful exploratory data analysis, we dropped Customer ID, Avrg local amt, Avrg std amt, Avrg idd amt, Avrg dialup amt from the predictor variable list since they were weakly correlated to the target variable.</p>
            </sec>
            <sec id="sec6">
                <title>Evaluation of experiment</title>
                <p>We performed two evaluations: performance measures
                    <xref ref-type="bibr" rid="ref19">
                        <sup>19</sup>
                    </xref> and algorithmic fairness metrics.
                    <xref ref-type="bibr" rid="ref20">
                        <sup>20</sup>
                    </xref>
                </p>
                <p>
                    <bold>
                        <italic toggle="yes">Performance measures</italic>
                    </bold>
                </p>
                <p>In measuring the classifier's performance, we applied standard measures which are commonly used in most machine learning classification tasks, including precision, recall and accuracy. We applied F-1 and AUC-ROC scores since accuracy alone is not enough to evaluate the actual performance of the classifiers. However, the performances of the different classifiers are compared by using AUC-ROC score. We created our own script (see 
                    <italic toggle="yes">Extended data</italic>) using Scikit-learn, a free machine learning software library for Python programming language. The performance of each classification was done as follows:
                    <disp-formula id="e1">
                        <mml:math display="block">
                            <mml:mtext>Accuracy</mml:mtext>
                            <mml:mo>=</mml:mo>
                            <mml:mfrac>
                                <mml:mrow>
                                    <mml:mi mathvariant="italic">TP</mml:mi>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi mathvariant="italic">TN</mml:mi>
                                </mml:mrow>
                                <mml:mrow>
                                    <mml:mi mathvariant="italic">TP</mml:mi>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi mathvariant="italic">TN</mml:mi>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi mathvariant="italic">FP</mml:mi>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi mathvariant="italic">FN</mml:mi>
                                    <mml:mspace width="0.25em"/>
                                </mml:mrow>
                            </mml:mfrac>
                            <mml:mo>,</mml:mo>
                        </mml:math>
                    </disp-formula>
                </p>
                <p>where
                    <disp-formula id="e2">
                        <mml:math display="block">
                            <mml:mi>TP</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:mtext>true positive</mml:mtext>
                        </mml:math>
                    </disp-formula>
                    <disp-formula id="e3">
                        <mml:math display="block">
                            <mml:mi>TN</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:mtext>true negative</mml:mtext>
                        </mml:math>
                    </disp-formula>
                    <disp-formula id="e4">
                        <mml:math display="block">
                            <mml:mspace width="0.25em"/>
                            <mml:mi>FP</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:mtext>false postive</mml:mtext>
                        </mml:math>
                    </disp-formula>
                    <disp-formula id="e5">
                        <mml:math display="block">
                            <mml:mi>FN</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:mtext>false negative</mml:mtext>
                        </mml:math>
                    </disp-formula>
                    <disp-formula id="e6">
                        <mml:math display="block">
                            <mml:mtext>Precision</mml:mtext>
                            <mml:mo>=</mml:mo>
                            <mml:mfrac>
                                <mml:mtext>True positive</mml:mtext>
                                <mml:mrow>
                                    <mml:mtext>True positive</mml:mtext>
                                    <mml:mo>+</mml:mo>
                                    <mml:mtext>False positive</mml:mtext>
                                </mml:mrow>
                            </mml:mfrac>
                        </mml:math>
                    </disp-formula>
                    <disp-formula id="e7">
                        <mml:math display="block">
                            <mml:mtext>Recall</mml:mtext>
                            <mml:mo>=</mml:mo>
                            <mml:mfrac>
                                <mml:mtext>True positive</mml:mtext>
                                <mml:mrow>
                                    <mml:mtext>Ture positive</mml:mtext>
                                    <mml:mo>+</mml:mo>
                                    <mml:mtext>False negative</mml:mtext>
                                </mml:mrow>
                            </mml:mfrac>
                        </mml:math>
                    </disp-formula>
                    <disp-formula id="e8">
                        <mml:math display="block">
                            <mml:mi mathvariant="normal">F</mml:mi>
                            <mml:mn>1</mml:mn>
                            <mml:mo>&#x2212;</mml:mo>
                            <mml:mtext>Score</mml:mtext>
                            <mml:mo>=</mml:mo>
                            <mml:mn>2</mml:mn>
                            <mml:mo>&#x2217;</mml:mo>
                            <mml:mfrac>
                                <mml:mrow>
                                    <mml:mtext>Precision</mml:mtext>
                                    <mml:mo>&#x2217;</mml:mo>
                                    <mml:mtext>Recall</mml:mtext>
                                </mml:mrow>
                                <mml:mrow>
                                    <mml:mtext>Precision</mml:mtext>
                                    <mml:mo>+</mml:mo>
                                    <mml:mtext>Recall</mml:mtext>
                                </mml:mrow>
                            </mml:mfrac>
                        </mml:math>
                    </disp-formula>
                    <disp-formula id="e9">
                        <mml:math display="block">
                            <mml:mi>AUC</mml:mi>
                            <mml:mo>&#x2212;</mml:mo>
                            <mml:mi>ROC</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:mfrac>
                                <mml:mrow>
                                    <mml:mo>&#x2211;</mml:mo>
                                    <mml:mtext>Rank</mml:mtext>
                                    <mml:mspace width="0.25em"/>
                                    <mml:mfenced>
                                        <mml:mo>+</mml:mo>
                                    </mml:mfenced>
                                    <mml:mo>&#x2212;</mml:mo>
                                    <mml:mfenced close="|" open="|">
                                        <mml:mo>+</mml:mo>
                                    </mml:mfenced>
                                    <mml:mo>&#x2217;</mml:mo>
                                    <mml:mfenced>
                                        <mml:mrow>
                                            <mml:mfenced close="|" open="|">
                                                <mml:mo>+</mml:mo>
                                            </mml:mfenced>
                                            <mml:mo>+</mml:mo>
                                            <mml:mn>1</mml:mn>
                                        </mml:mrow>
                                    </mml:mfenced>
                                    <mml:mo>/</mml:mo>
                                    <mml:mn>2</mml:mn>
                                </mml:mrow>
                                <mml:mrow>
                                    <mml:mfenced close="|" open="|">
                                        <mml:mo>+</mml:mo>
                                    </mml:mfenced>
                                    <mml:mo>+</mml:mo>
                                    <mml:mfenced close="|" open="|">
                                        <mml:mo>&#x2212;</mml:mo>
                                    </mml:mfenced>
                                </mml:mrow>
                            </mml:mfrac>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>where</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mspace linebreak="newline"/>
                            <mml:mo>&#x03a3;</mml:mo>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>Rank</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mo>(</mml:mo>
                            <mml:mo>+</mml:mo>
                            <mml:mo>)</mml:mo>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>is</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>the</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>sum</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>of</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>all</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>positive</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>classified</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>examples</mml:mi>
                            <mml:mspace linebreak="newline"/>
                            <mml:mo>|</mml:mo>
                            <mml:mo>+</mml:mo>
                            <mml:mo>|</mml:mo>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>is</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>the</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>number</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>of</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>positive</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>examples</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>in</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>the</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>dataset</mml:mi>
                            <mml:mspace linebreak="newline"/>
                            <mml:mo>|</mml:mo>
                            <mml:mo>-</mml:mo>
                            <mml:mo>|</mml:mo>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>is</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>the</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>number</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>of</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>negative</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>examples</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>in</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>the</mml:mi>
                            <mml:mo>&#x00a0;</mml:mo>
                            <mml:mi>dataset</mml:mi>
                        </mml:math>
                    </disp-formula>
                </p>
                <p>
                    <bold>
                        <italic toggle="yes">Algorithmic fairness metrics</italic>
                    </bold>
                </p>
                <p>We emphasized the assessment of whether the classifier is discriminated against between women, a protected group, and men, a non-protected group. We applied two well-known fairness definitions in measuring algorithmic fairness and utilized the popular AI-fairness 360 tool to calculate algorithmic fairness.
                    <xref ref-type="bibr" rid="ref20">
                        <sup>20</sup>
                    </xref>
                </p>
                <p>
                    <italic toggle="yes">Statistical parity (SP)</italic>: Also known as an equal acceptance rate. SP is achieved if women have an equal probability to be predicted in the positive, i.e., churn class, as the men.
                    <xref ref-type="bibr" rid="ref21">
                        <sup>21</sup>
                    </xref>
                </p>
                <p>SP difference measures the difference of a specific outcome between the protected (female group) and non-protected (male group). The smaller the SP difference between the two groups, we can say that the model treats the unprotected group statistically similar to the protected group.</p>
                <p>SP is calculated as follows:
                    <disp-formula id="e10">
                        <mml:math display="block">
                            <mml:mo>Pr</mml:mo>
                            <mml:mspace width="0.25em"/>
                            <mml:mfenced close="|" open="(">
                                <mml:mrow>
                                    <mml:mi>Y</mml:mi>
                                    <mml:mo>=</mml:mo>
                                    <mml:mn>1</mml:mn>
                                </mml:mrow>
                            </mml:mfenced>
                            <mml:mspace width="0.25em"/>
                            <mml:mtext>Group</mml:mtext>
                            <mml:mo>=</mml:mo>
                            <mml:mtext>male</mml:mtext>
                            <mml:mo stretchy="true">)</mml:mo>
                            <mml:mo>=</mml:mo>
                            <mml:mo>Pr</mml:mo>
                            <mml:mspace width="0.25em"/>
                            <mml:mfenced close="|" open="(">
                                <mml:mrow>
                                    <mml:mi>Y</mml:mi>
                                    <mml:mo>=</mml:mo>
                                    <mml:mn>1</mml:mn>
                                </mml:mrow>
                            </mml:mfenced>
                            <mml:mspace width="0.25em"/>
                            <mml:mtext>Group</mml:mtext>
                            <mml:mo>=</mml:mo>
                            <mml:mtext>female</mml:mtext>
                            <mml:mo stretchy="true">)</mml:mo>
                            <mml:mo>,</mml:mo>
                            <mml:mspace width="0.55em"/>
                            <mml:mtext>where</mml:mtext>
                            <mml:mspace width="0.25em"/>
                            <mml:mi>Y</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:mtext>predicted decision</mml:mtext>
                        </mml:math>
                    </disp-formula>
                </p>
                <p>
                    <italic toggle="yes">Disparate Impact (DI)</italic>: Also known as indirect discrimination where no protected variables are directly applied, but biased outcomes are still produced relying on the variables correlated with protected variables.
                    <xref ref-type="bibr" rid="ref22">
                        <sup>22</sup>
                    </xref> The standardized threshold for the calculation of DI is 0.8, which means the group whose DI values are under 0.8 are discriminated by the classifier.</p>
                <p>The threshold value 80% is advised by the US Equal Employment Opportunity Commission.
                    <xref ref-type="bibr" rid="ref23">
                        <sup>23</sup>
                    </xref> The model could be DI-free when the value is larger than 80% but it should be lower than 125%.
                    <xref ref-type="bibr" rid="ref24">
                        <sup>24</sup>
                    </xref>
                </p>
                <p>DI is calculated as follows:
                    <disp-formula id="e11">
                        <mml:math display="block">
                            <mml:mi mathvariant="italic">DI</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:mfrac>
                                <mml:mrow>
                                    <mml:mo>Pr</mml:mo>
                                    <mml:mfenced close="|" open="(">
                                        <mml:mrow>
                                            <mml:mi>Y</mml:mi>
                                            <mml:mo>=</mml:mo>
                                            <mml:mn>1</mml:mn>
                                            <mml:mspace width="0.25em"/>
                                        </mml:mrow>
                                    </mml:mfenced>
                                    <mml:mtext>Group</mml:mtext>
                                    <mml:mo>=</mml:mo>
                                    <mml:mtext>female</mml:mtext>
                                    <mml:mo stretchy="true">)</mml:mo>
                                </mml:mrow>
                                <mml:mrow>
                                    <mml:mo>Pr</mml:mo>
                                    <mml:mfenced close="|" open="(">
                                        <mml:mrow>
                                            <mml:mi>Y</mml:mi>
                                            <mml:mo>=</mml:mo>
                                            <mml:mn>1</mml:mn>
                                        </mml:mrow>
                                    </mml:mfenced>
                                    <mml:mtext>Group</mml:mtext>
                                    <mml:mo>=</mml:mo>
                                    <mml:mtext>male</mml:mtext>
                                    <mml:mo stretchy="true">)</mml:mo>
                                </mml:mrow>
                            </mml:mfrac>
                            <mml:mspace width="0.55em"/>
                            <mml:mo>&#x2264;</mml:mo>
                            <mml:mspace width="0.55em"/>
                            <mml:mi>&#x03c4;</mml:mi>
                            <mml:mo>=</mml:mo>
                            <mml:mn>0.8</mml:mn>
                            <mml:mo>,</mml:mo>
                        </mml:math>
                    </disp-formula>
                </p>
                <p>where 
                    <italic toggle="yes">Y</italic> = predicted decision</p>
            </sec>
        </sec>
        <sec id="sec7" sec-type="results">
            <title>Results</title>
            <p>The preliminary classification results for the datasets with different data unbalanced rates using four DSTs are shown in 
                <xref ref-type="table" rid="T3">Tables 3</xref>&#x2013;
                <xref ref-type="table" rid="T5">5</xref>. 
                <xref ref-type="table" rid="T3">Table 3</xref> shows the specific results of classification performance gotten when testing on 5% of unbalanced rate concerning the chosen classifiers and four DSTs.</p>
            <table-wrap id="T3" orientation="portrait" position="float">
                <label>Table 3. </label>
                <caption>
                    <title>The classification results for the dataset with 5% unbalanced rate.</title>
                </caption>
                <table content-type="article-table" frame="hsides">
                    <thead>
                        <tr>
                            <th align="left" colspan="1" rowspan="2" valign="top">Classifier</th>
                            <th align="left" colspan="5" rowspan="1" valign="top">5% imbalanced with ROS</th>
                            <th align="left" colspan="5" rowspan="1" valign="top">5% imbalanced with RUS</th>
                        </tr>
                        <tr>
                            <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Precision</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Recall</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">F1-score</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">AUC-ROC</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Precision</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Recall</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">F1-score</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">AUC-ROC</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">RF</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.99</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.99</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.99</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.99</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.99</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DT</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.97</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.80</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.80</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.80</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.80</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.79</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">LGBM</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.89</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.90</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.89</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.89</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.96</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">GB</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.85</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.85</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.85</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.85</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.99</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.85</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.85</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.85</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.85</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">LG</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.78</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.81</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.78</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.78</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.87</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.80</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.83</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.80</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.81</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.88</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">XGBoost</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.97</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.83</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.83</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.83</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.83</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.92</td>
                        </tr>
                    </tbody>
                </table>
                <table content-type="article-table" frame="hsides">
                    <thead>
                        <tr>
                            <th align="left" colspan="5" rowspan="1" valign="top">5% imbalanced with SMOTE</th>
                            <th align="left" colspan="5" rowspan="1" valign="top">5% imbalanced with ADASYN</th>
                        </tr>
                        <tr>
                            <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Precision</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Recall</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">F1-score</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">AUC-ROC</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Precision</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Recall</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">F1-score</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">AUC-ROC</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.99</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.99</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.96</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.96</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.96</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.96</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.96</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.96</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.96</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.96</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.96</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.96</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.99</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.99</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.96</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.96</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.96</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.96</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.99</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.96</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.96</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.96</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.96</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.99</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.79</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.81</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.79</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.79</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.87</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.77</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.81</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.77</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.77</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.99</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.98</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.99</td>
                        </tr>
                    </tbody>
                </table>
            </table-wrap>
            <table-wrap id="T4" orientation="portrait" position="float">
                <label>Table 4. </label>
                <caption>
                    <title>The classification results for the dataset with 15% unbalanced rate.</title>
                </caption>
                <table content-type="article-table" frame="hsides">
                    <thead>
                        <tr>
                            <th align="left" colspan="1" rowspan="2" valign="top">Classifier</th>
                            <th align="left" colspan="5" rowspan="1" valign="top">15% imbalanced with ROS</th>
                            <th align="left" colspan="5" rowspan="1" valign="top">15% imbalanced with RUS</th>
                        </tr>
                        <tr>
                            <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Precision</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Recall</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">F1-score</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">AUC-ROC</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Precision</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Recall</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">F1-score</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">AUC-ROC</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">RF</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.97</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.97</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.97</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.97</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.99</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.75</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.83</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DT</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.68</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.68</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.68</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.68</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.68</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">LGBM</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.78</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.78</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.78</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.78</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.87</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.77</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">GB</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.77</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">LG</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.64</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.64</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.64</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.64</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.69</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.64</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.64</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.64</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.64</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.70</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">XGBoost</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.81</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.81</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.81</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.81</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.90</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.75</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.75</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                        </tr>
                    </tbody>
                </table>
                <table content-type="article-table" frame="hsides">
                    <thead>
                        <tr>
                            <th align="left" colspan="5" rowspan="1" valign="top">15% imbalanced with SMOTE</th>
                            <th align="left" colspan="5" rowspan="1" valign="top">15% imbalanced with ADASYN</th>
                        </tr>
                        <tr>
                            <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Precision</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Recall</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">F1-score</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">AUC-ROC</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Precision</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Recall</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">F1-score</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">AUC-ROC</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.94</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.97</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.97</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.89</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.89</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.89</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.89</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.89</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.88</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.89</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.88</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.89</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.88</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.94</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.94</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.94</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.94</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.97</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.94</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.94</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.94</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.94</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.97</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.91</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.92</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.91</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.91</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.96</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.91</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.92</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.91</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.91</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.96</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.64</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.64</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.64</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.64</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.70</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.59</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.59</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.59</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.59</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.63</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.94</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.94</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.94</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.97</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.94</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.94</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.94</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.94</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.97</td>
                        </tr>
                    </tbody>
                </table>
            </table-wrap>
            <table-wrap id="T5" orientation="portrait" position="float">
                <label>Table 5. </label>
                <caption>
                    <title>The classification results for the dataset with 30% unbalanced rate.</title>
                </caption>
                <table content-type="article-table" frame="hsides">
                    <thead>
                        <tr>
                            <th align="left" colspan="1" rowspan="2" valign="top">Classifier</th>
                            <th align="left" colspan="5" rowspan="1" valign="top">30% imbalanced with ROS</th>
                            <th align="left" colspan="5" rowspan="1" valign="top">30% imbalanced with RUS</th>
                        </tr>
                        <tr>
                            <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Precision</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Recall</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">F1-score</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">AUC-ROC</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Precision</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Recall</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">F1-score</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">AUC-ROC</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">RF</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.89</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.89</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.89</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.89</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.95</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.74</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.74</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.74</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.74</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.82</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DT</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.83</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.83</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.83</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.83</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.83</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.66</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.66</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.66</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.66</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.66</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">LGBM</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.75</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.75</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.75</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.83</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">GB</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.74</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.75</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.74</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.74</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.82</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.75</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.76</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.75</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.75</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.82</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">LG</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.66</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.67</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.66</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.66</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.68</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.63</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.64</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.63</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.63</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.68</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">XGBoost</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.77</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.77</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.77</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.77</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.85</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.75</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.75</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.75</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.75</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.83</td>
                        </tr>
                    </tbody>
                </table>
                <table content-type="article-table" frame="hsides">
                    <thead>
                        <tr>
                            <th align="left" colspan="5" rowspan="1" valign="top">30% imbalanced with SMOTE</th>
                            <th align="left" colspan="5" rowspan="1" valign="top">30% imbalanced with ADASYN</th>
                        </tr>
                        <tr>
                            <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Precision</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Recall</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">F1-score</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">AUC-ROC</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Precision</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">Recall</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">F1-score</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">AUC-ROC</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">o.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.92</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.92</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.79</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.79</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.79</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.79</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.79</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.79</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.79</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.79</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.79</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.79</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.87</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.92</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.87</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.91</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.85</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.91</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.66</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.67</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.66</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.66</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.68</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.55</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.92</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.55</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.65</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.63</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.92</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.87</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                        </tr>
                    </tbody>
                </table>
            </table-wrap>
            <p>
                <xref ref-type="table" rid="T4">Table 4</xref> shows the details results of classification performance obtained when testing on 15% of unbalanced dataset with respect to the chosen classifiers and four DSTs.</p>
            <p>
                <xref ref-type="table" rid="T5">Table 5</xref> shows the details results of classification performance obtained when testing on 30% of unbalanced dataset with respect to the chosen classifiers and four DSTs.</p>
            <p>The classification results are shown in terms of learning curves in this section from 
                <xref ref-type="fig" rid="f2">Figures 2</xref> to 
                <xref ref-type="fig" rid="f4">4</xref>. In this article, we display the results of the classifications with RF since RF yields the best performance results among the other six classifiers. 
                <xref ref-type="fig" rid="f2">Figure 2</xref> indicates the learning curves of the classification performance with 95%-5% (i.e, non-churn-churn) unbalanced rate of data compared with the original data and after four DSTs are applied.</p>
            <fig fig-type="figure" id="f2" orientation="portrait" position="float">
                <label>Figure 2. </label>
                <caption>
                    <p>Before and after sampling results of learning curves with the data (95%-5% of non-churn and churn rate) with RF classifier.</p>
                </caption>
                <graphic id="gr2" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/134711/6b017cdd-3923-4ae7-95ae-bb282ba864fc_figure2.gif"/>
            </fig>
            <fig fig-type="figure" id="f3" orientation="portrait" position="float">
                <label>Figure 3. </label>
                <caption>
                    <p>Before and after sampling results of learning curves with the data (85%-15% of non-churn and churn rate) with RF classifier.</p>
                </caption>
                <graphic id="gr3" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/134711/6b017cdd-3923-4ae7-95ae-bb282ba864fc_figure3.gif"/>
            </fig>
            <fig fig-type="figure" id="f4" orientation="portrait" position="float">
                <label>Figure 4. </label>
                <caption>
                    <p>Before and after sampling results of learning curves with the data (70%-15% of non-churn and churn rate) with RF classifier.</p>
                </caption>
                <graphic id="gr4" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/134711/6b017cdd-3923-4ae7-95ae-bb282ba864fc_figure4.gif"/>
            </fig>
            <p>
                <xref ref-type="fig" rid="f3">Figure 3</xref> reveals the learning curves of the classification performance with 85%-15% (i.e, non-churn-churn) unbalanced rate of data compared with the original data and after four DSTs are applied.</p>
            <p>
                <xref ref-type="fig" rid="f4">Figure 4</xref> shows the learning curves of the classification performance with 70%-30% (i.e, non-churn-churn) unbalanced rate of data compared with the original data and after four DSTs are applied.</p>
            <p>In our study, we have observed that a variable, is-senior remained unbalanced even after applying the DSTs. The algorithmic fairness scores for each group with different unbalanced rates are described in 
                <xref ref-type="table" rid="T6">Tables 6</xref>&#x2013;
                <xref ref-type="table" rid="T8">8</xref>. 
                <xref ref-type="table" rid="T6">Table 6</xref> shows the comparative results of SP difference and DI scores calculated on 5% unbalanced dataset and the original dataset.</p>
            <table-wrap id="T6" orientation="portrait" position="float">
                <label>Table 6. </label>
                <caption>
                    <title>The algorithmic fairness measures on 5% unbalanced dataset.</title>
                </caption>
                <table content-type="article-table" frame="hsides">
                    <thead>
                        <tr>
                            <th align="left" colspan="1" rowspan="1" valign="top">Algorithmic fairness metrics</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">5% original data</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">5% imbalanced with ROS</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">5% imbalanced with RUS</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">5% imbalanced with SMOTE</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">5% imbalanced with ADASYN</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">RF</td>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">SP Difference</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.0056</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.0703</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.0524</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.1402</bold>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.1401</bold>
                            </td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DI</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.80</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.89</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>1.32</bold>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>1.32</bold>
                            </td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">LightGBM</td>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">SP Difference</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.0067</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.0703</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.0524</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.1402</bold>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.1390</bold>
                            </td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DI</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.79</bold>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.89</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>1.32</bold>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>1.32</bold>
                            </td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">GB</td>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">SP Difference</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.0057</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.0854</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.5456</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.1349</bold>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.1319</bold>
                            </td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DI</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.80</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.89</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>1.31</bold>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>1.29</bold>
                            </td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">LR</td>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">SP Difference</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.0024</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.0789</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.0227</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.1765</bold>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.1591</bold>
                            </td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DI</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.64</bold>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.94</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.96</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>1.34</bold>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>1.28</bold>
                            </td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">XGBoost</td>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">SP Difference</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.0069</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.0842</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.5446</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.1387</bold>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.1383</bold>
                            </td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DI</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.78</bold>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.85</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.89</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>1.32</bold>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>1.32</bold>
                            </td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DT</td>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">SP Difference</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.0073</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.0698</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.0397</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.1331</bold>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.1317</bold>
                            </td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DI</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.87</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.91</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>1.30</bold>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>1.29</bold>
                            </td>
                        </tr>
                    </tbody>
                </table>
            </table-wrap>
            <table-wrap id="T7" orientation="portrait" position="float">
                <label>Table 7. </label>
                <caption>
                    <title>The algorithmic fairness measures on 15% unbalanced dataset.</title>
                </caption>
                <table content-type="article-table" frame="hsides">
                    <thead>
                        <tr>
                            <th align="left" colspan="1" rowspan="1" valign="top">Algorithmic fairness metrics</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">15% original data</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">15% imbalanced with ROS</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">15% imbalanced with RUS</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">RF</td>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">SP Difference</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.009644</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.058223</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.049729</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DI</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.871212</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.892587</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.891628</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">LightGBM</td>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">SP Difference</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.011044</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.070341</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.059666</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DI</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.856964</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.853293</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.872208</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Gradient Boosting</td>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">SP Difference</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.006168</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.057517</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.055826</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DI</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.913621</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.874225</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.877351</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Logistic Regression</td>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">SP Difference</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.007872</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.1004</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.01865</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DI</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.711771</bold>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>0.79581</bold>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">1.03868</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">XGBoost</td>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">SP Difference</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.011008</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.069346</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.059144</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DI</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.864076</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.858386</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.874791</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Decision Tree</td>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">SP Difference</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.016556</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.05406</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.016812</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DI</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.900914</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.907414</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.967054</td>
                        </tr>
                    </tbody>
                </table>
            </table-wrap>
            <table-wrap id="T8" orientation="portrait" position="float">
                <label>Table 8. </label>
                <caption>
                    <title>The algorithmic fairness measures on 30% unbalanced dataset.</title>
                </caption>
                <table content-type="article-table" frame="hsides">
                    <thead>
                        <tr>
                            <th align="left" colspan="1" rowspan="1" valign="top">Algorithmic fairness metrics</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">30% original data</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">30% imbalanced with ROS</th>
                            <th align="left" colspan="1" rowspan="1" valign="top">30% imbalanced with RUS</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">RF</td>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">SP Difference</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.035246</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.046984</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.043816</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DI</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.824513</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.911002</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.905239</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">LightGBM</td>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">SP Difference</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.036931</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.055603</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.059621</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DI</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.811701</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.882727</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.872917</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Gradient Boosting</td>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">SP Difference</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.027146</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.04126</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.044747</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DI</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.847531</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.910397</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.901757</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Logistic Regression</td>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">SP Difference</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.002424</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.011873</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.029787</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DI</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>1.042928</bold>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>1.033943</bold>
                            </td>
                            <td align="left" colspan="1" rowspan="1" valign="top">
                                <bold>1.063294</bold>
                            </td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">XGBoost</td>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">SP Difference</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.036665</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.056972</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.058089</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DI</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.826001</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.881622</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.877416</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">Decision Tree</td>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                            <td colspan="1" rowspan="1"/>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">SP Difference</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.28866</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.033586</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">&#x2212;0.028326</td>
                        </tr>
                        <tr>
                            <td align="left" colspan="1" rowspan="1" valign="top">DI</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.911711</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.94218</td>
                            <td align="left" colspan="1" rowspan="1" valign="top">0.944787</td>
                        </tr>
                    </tbody>
                </table>
            </table-wrap>
            <p>
                <xref ref-type="table" rid="T7">Table 7</xref> displays the comparative results of SP difference and DI scores calculated on 15% unbalanced dataset and original dataset.</p>
            <p>
                <xref ref-type="table" rid="T8">Table 8</xref> describes the comparative results of SP difference and DI scores calculated on 30% unbalanced dataset and original dataset.</p>
        </sec>
        <sec id="sec8">
            <title>Discussion</title>
            <sec id="sec9">
                <title>Overview of experimental results</title>
                <p>Recent works of algorithmic fairness research in machine learning applications is broadly organized into three main trends. Some studies emphasize enhancing or proposing better fairness notions and evaluation metrics in line with the domains concerned,
                    <xref ref-type="bibr" rid="ref21">
                        <sup>21</sup>
                    </xref>
                    <sup>,</sup>
                    <xref ref-type="bibr" rid="ref25">
                        <sup>25</sup>
                    </xref> some focus on the ways to mitigate the bias in the classification process (which can further be divided into three main groups: pre-, in-, and post-processing techniques),
                    <xref ref-type="bibr" rid="ref26">
                        <sup>26</sup>
                    </xref>
                    <sup>&#x2013;</sup>
                    <xref ref-type="bibr" rid="ref29">
                        <sup>29</sup>
                    </xref> while the last trend proposes how to maintain the ethical AI standards and policies in practicing machine learning applications in different sectors.
                    <xref ref-type="bibr" rid="ref30">
                        <sup>30</sup>
                    </xref>
                    <sup>,</sup>
                    <xref ref-type="bibr" rid="ref31">
                        <sup>31</sup>
                    </xref>
                </p>
                <p>Despite some previous empirical studies on the impact of using preprocessing techniques on algorithmic fairness, the findings of previous works could not pinpoint the direct impact of using DSTs on algorithmic fairness. Lourenc and Antunes,
                    <xref ref-type="bibr" rid="ref10">
                        <sup>10</sup>
                    </xref> which is the closest work to our research, distinguish the effect of data preparation on algorithmic fairness. However, their work has been tested with two small datasets and provides general results using random under- and over- DSTs. Importantly, their work fails to be tested on the widely-applied DSTs, SMOTE and ADASYN. In contrast, we apply real-world business data and show how different DSTs influence dissimilar levels of imbalance rate.</p>
                <p>In the classification task, RF seems to be the best classifier since it yielded the best results over the other five models based on their AUC-ROC scores, while LR provided the worst scores for almost all metrics. It was observed that RUS worked better for the extremely unbalanced situation compared with 15% and 30% imbalanced rates. The best outcomes were found via ROS, SMOTE, and ADASYN in all different unbalanced rates, thus, could be concluded that oversampling techniques seem to provide more promising prediction results than undersampling techniques. This might be because the undersampling technique modifies the data by decreasing the majority of instances, which makes the dataset lack useful information for learning. By observing the learning curves, the performance of the classifier is highly increased after applying DSTs in all versions of the data. But in the 5% unbalanced version, the performance results are better than two other versions of data. The classifier might ignore 5% of churns since the churn rate is too less. The data size used in the experiment is less, it would be better to use more training examples to see whether the models are well fit or not.</p>
                <p>For all three unbalanced rates, the original dataset always gave less statistical parity differences (SPD) compared to sampled datasets created using four DSTs, while datasets with RUS and ROS yield a slightly larger SPD but the statistics showed there is no disparate impact. However, we can hypothetically consider there might still be a bias as both RUS and ROS have their limitations. With RUS, the important information could have been removed and the classifier could provide a biased result since there was less information to learn from. On the other hand, with ROS, the prediction performance could also be biased due to the overfitting problem. In this sense, it is suggestible to apply different fairness measures and to compare the fairness scores. For the DI scores, if there is DI less than 0.8, there is indirect discrimination towards the unprotected group. The mathematical equivalence of DI suggests equalizing the outcomes between protected and unprotected groups. However, in reality, the conditions in the context of interest drive us to allow DI to a specific group up to some percentage. For example, in telecom CCP, the number of female customers could be very less than the dataset, since most males usually apply for a network plan representing the whole household. Therefore, we assume considering DI with the 80% rule is reasonable.</p>
                <p>In the 5% unbalanced original dataset, LGBM, LR and XG-Boost imposed with DI values of 0.79, 0.64, and 0.78 respectively. But there is no DI in the other two original datasets for 10% and 30%. This reveals that more discrimination could occur on a more unbalanced dataset. The analysis of all datasets with SMOTE and ADASYN provides alarming information on the classifier&#x2019;s discrimination against the unprotected group. The 30% unbalanced dataset yields the worst unfair results since this is the highest SPD between female and males groups with LR of 0.38 and 0.43, respectively. Overall, among all DSTs, ADASYN, and SMOTE tend to provide more unfair outcomes compared to other DSTs. In contradiction, they both provide a better classification performance in comparison to RUS and ROS. There is not a huge difference among the three different data unbalanced levels. However, in this study, we experimented with the gender attribute as a sensitive variable.</p>
            </sec>
            <sec id="sec10">
                <title>Opportunities and challenges</title>
                <p>Due to the nature of the CCP process and the rarity issue, training datasets have high chances to have compounded bias and suffer from unbalanced problems not only for the target class but also in the other attributes including sensitive variables. We have noticed that one variable remained unbalanced even after applying the DSTs; in such a case, a careful selection of data attributes should be done to avoid selection bias.</p>
                <p>As the quality of training data is important, we would suggest enhanced mechanisms of data repairing techniques to prevent bias in the training data. Furthermore, the algorithmic fairness problem mostly concerns societal discrimination. For example, in the scholarship selection process, if classifiers give more favors to males than females who have the same qualifications as males but are not selected, this will decrease their chances of a scholarship. In a profit-centered industry like telecom, one could think there will be no loss for the customers though any group is less or more favored. It is important to consider the impact of biased decisions for the sake of the company&#x2019;s reputation, the importance of equal treatment to customers, and to practice ethical AI policies.</p>
            </sec>
        </sec>
        <sec id="sec11" sec-type="conclusions">
            <title>Conclusions</title>
            <p>In this paper, we experimented on three versions of unbalanced real-world telecom datasets to assess the impact of using four types of DSTs on the algorithmic fairness in the CCP process and compared the results with the unsampled original dataset. Classification performance and algorithmic fairness were evaluated with well-known metrics. The outcomes imply that RF provides the best classification results. Using SMOTE and ADASYN yields larger SPD between male and female groups as well as a disparate impact on the female over the male group. Previous work emphasizes the use of this method in choosing a scholarship candidate, releasing prisoners on parole, and choosing a credible candidate. Since machine learning applications would be applied to almost every sector so on shortly, the practice of using fairer or unbiased systems is essential. Our study highlights the importance of paying attention to algorithmic fairness in the machine-driven decision-making process of the profit-centered and customer-oriented sectors on which very little research work has been done. Particularly, our finding highlights the fact that a careful choice of DSTs must be done to achieve unbiased prediction results. Despite the experiment being done on the real-world telecom dataset with 100,000 records, the data size is still considered to be very less and the experimental results on a much larger amount of data could be varied. Since the data size is small, the results on the learning curves are not remarkable. Another limitation is we use only one sensitive attribute, gender, in our study. In future work, we would like to test the same procedure on a much larger dataset and would like to measure more algorithmic fairness metrics to investigate the best suitable algorithmic measures for the CCP task. Moreover, we would like to test with more sensitive variables rather than just gender.</p>
        </sec>
        <sec id="sec12">
            <title>Data availability</title>
            <sec id="sec13">
                <title>Underlying data</title>
                <p>The real-world telecom dataset was obtained from the Business Intelligence and Analytics department of Telekom Malaysia Bhd. The authors were required to go through a strict approval process following established data governance framework. Interested readers/reviewers may contact the Business Intelligence and Analytics department to request the data (
                    <email xlink:href="mailto:technicalsuport@tm.com.my">technicalsuport@tm.com.my</email>). The decision as to whether or not to grant access to the data is at the discretion of Telekom Malaysia Bhd.</p>
                <p>As most telco companies own similar customer data, other customer churn datasets that are representative of the data being used in this research can be found as follows:
                    <list list-type="bullet">
                        <list-item>
                            <label>1.</label>
                            <p>
                                <ext-link ext-link-type="uri" xlink:href="https://www.ibm.com/docs/en/cognos-analytics/11.1.0?topic=samples-telco-customer-churn">https://www.ibm.com/docs/en/cognos-analytics/11.1.0?topic=samples-telco-customer-churn</ext-link>.</p>
                        </list-item>
                        <list-item>
                            <label>2.</label>
                            <p>
                                <ext-link ext-link-type="uri" xlink:href="https://datasetsearch.research.google.com/search?query=Telco%20Customer%20Churn%20dataset%20site%3Akaggle.com&amp;docid=L2cvMTFsbDF0dzJ5NA%3D%3D">https://datasetsearch.research.google.com/search?query=Telco%20Customer%20Churn%20dataset%20site%3Akaggle.com&amp;docid=L2cvMTFsbDF0dzJ5NA%3D%3D</ext-link>.</p>
                        </list-item>
                    </list>
                </p>
            </sec>
            <sec id="sec14">
                <title>Extended data</title>
                <p>Analysis code available from: 
                    <ext-link ext-link-type="uri" xlink:href="https://github.com/mawmaw/fairness_churn">https://github.com/mawmaw/fairness_churn</ext-link>.</p>
                <p>Archived analysis code as at time of publication: 
                    <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5281/zenodo.5516218">https://doi.org/10.5281/zenodo.5516218</ext-link>.
                    <xref ref-type="bibr" rid="ref32">
                        <sup>32</sup>
                    </xref>
                </p>
                <p>License: 
                    <ext-link ext-link-type="uri" xlink:href="https://opensource.org/licenses/MIT">MIT License</ext-link>.</p>
            </sec>
        </sec>
    </body>
    <back>
        <ref-list>
            <title>References</title>
            <ref id="ref1">
                <label>1</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Eria</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Marikannan</surname>
                            <given-names>BP</given-names>
                        </name>
</person-group>:
                    <article-title>Systematic Review of Customer Churn Prediction in the Telecom Sector.</article-title>
                    <year>2018</year>; vol.<volume>2</volume>(no.<issue>1</issue>).</mixed-citation>
            </ref>
            <ref id="ref2">
                <label>2</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Amin</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Anwar</surname>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Comparing Oversampling Techniques to Handle the Class Imbalance Problem: A Customer Churn Prediction Case Study.</article-title>
                    <source>

                        <italic toggle="yes">IEEE Access.</italic>
</source>
                    <year>2016</year>;<volume>4</volume>(<issue>Ml</issue>):<fpage>7940</fpage>&#x2013;<lpage>7957</lpage>.
                    <pub-id pub-id-type="doi">10.1109/ACCESS.2016.2619719</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref3">
                <label>3</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Gui</surname>
                            <given-names>C</given-names>
                        </name>
</person-group>:
                    <article-title>Analysis of imbalanced data set problem: The case of churn prediction for telecommunication.</article-title>
                    <year>2017</year>; vol.<volume>6</volume>(no.<issue>2</issue>): pp.<fpage>93</fpage>&#x2013;<lpage>99</lpage>.</mixed-citation>
            </ref>
            <ref id="ref4">
                <label>4</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Dieterich</surname>
                            <given-names>W</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mendoza</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Brennan</surname>
                            <given-names>T</given-names>
                        </name>
</person-group>:
                    <article-title>COMPAS Risk Scales: Demonstrating Accuracy Equity and Predictive Parity.</article-title>
                    <year>2016</year>.</mixed-citation>
            </ref>
            <ref id="ref5">
                <label>5</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ntoutsi</surname>
                            <given-names>E</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Bias in Data-driven AI Systems -- An Introductory Survey.</article-title>
                    <year>2020</year>; pp.<fpage>1</fpage>&#x2013;<lpage>19</lpage>.</mixed-citation>
            </ref>
            <ref id="ref6">
                <label>6</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kamiran</surname>
                            <given-names>F</given-names>
                        </name>

                        <name name-style="western">
                            <surname>&#x017d;liobait&#x0117;</surname>
                            <given-names>I</given-names>
                        </name>
</person-group>:
                    <article-title>Explainable and non-explainable discrimination in classification.</article-title>
                    <source>

                        <italic toggle="yes">Stud. Appl. Philos. Epistemol. Ration. Ethics.</italic>
</source>
                    <year>2013</year>;<volume>3</volume>(<issue>January 2012</issue>):<fpage>155</fpage>&#x2013;<lpage>170</lpage>.
                    <pub-id pub-id-type="doi">10.1007/978-3-642-30487-3_8</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref7">
                <label>7</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Barocas</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Selbst</surname>
                            <given-names>AD</given-names>
                        </name>
</person-group>:
                    <article-title>Big Data&#x2019; s Disparate Impact.</article-title>
                    <year>2016</year>; vol.<volume>671</volume>: pp.<fpage>671</fpage>&#x2013;<lpage>732</lpage>.</mixed-citation>
            </ref>
            <ref id="ref8">
                <label>8</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ballings</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Van Den Poel</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Verhagen</surname>
                            <given-names>E</given-names>
                        </name>
</person-group>:
                    <article-title>Improving customer churn prediction by data augmentation using pictorial stimulus-choice data.</article-title>
                    <source>

                        <italic toggle="yes">Adv. Intell. Syst. Comput.</italic>
</source>
                    <year>2012</year>;<volume>171</volume>(<issue>1</issue>):<fpage>217</fpage>&#x2013;<lpage>226</lpage>.
                    <pub-id pub-id-type="doi">10.1007/978-3-642-30864-2_21</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref9">
                <label>9</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Mehrotra</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>McInerney</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bouchard</surname>
                            <given-names>H</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Towards a fair marketplace: Counterfactual evaluation of the trade-off between relevance, fairness &amp; satisfaction in recommendation systems.</article-title>
                    <source>

                        <italic toggle="yes">Int. Conf. Inf. Knowl. Manag. Proc.</italic>
</source>
                    <year>2018</year>:<fpage>2243</fpage>&#x2013;<lpage>2252</lpage>.</mixed-citation>
            </ref>
            <ref id="ref10">
                <label>10</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Lourenc</surname>
                            <given-names>N</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Antunes</surname>
                            <given-names>N</given-names>
                        </name>
</person-group>:
                    <article-title>The Impact of Data Preparation on the Fairness of Software Systems.</article-title>
                </mixed-citation>
            </ref>
            <ref id="ref11">
                <label>11</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Beutel</surname>
                            <given-names>A</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Fairness in recommendation ranking through pairwise comparisons.</article-title>
                    <source>

                        <italic toggle="yes">Proc. ACM SIGKDD Int. Conf. Knowl. Discov. Data Min.</italic>
</source>
                    <year>2019</year>;<fpage>2212</fpage>&#x2013;<lpage>2220</lpage>.</mixed-citation>
            </ref>
            <ref id="ref12">
                <label>12</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Patro</surname>
                            <given-names>GK</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Biswas</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ganguly</surname>
                            <given-names>N</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>FairRec: Two-Sided Fairness for Personalized Recommendations in Two-Sided Platforms.</article-title>
                    <source>

                        <italic toggle="yes">Web Conf. 2020 - Proc. World Wide Web Conf. WWW 2020.</italic>
</source>
                    <year>2020</year>;<volume>2</volume>:<fpage>1194</fpage>&#x2013;<lpage>1204</lpage>.</mixed-citation>
            </ref>
            <ref id="ref13">
                <label>13</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Columelli</surname>
                            <given-names>L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Nunez-Del-Prado</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Zarate-Gamarra</surname>
                            <given-names>L</given-names>
                        </name>
</person-group>:
                    <article-title>Measuring churner influence on pre-paid subscribers using fuzzy logic.</article-title>
                    <source>

                        <italic toggle="yes">Proc. 2016 42nd Lat. Am. Comput. Conf. CLEI 2016.</italic>
</source>
                    <year>2017</year>.</mixed-citation>
            </ref>
            <ref id="ref14">
                <label>14</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ahmed</surname>
                            <given-names>U</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Khan</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Khan</surname>
                            <given-names>SH</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Transfer Learning and Meta Classification Based Deep Churn Prediction System for Telecom Industry.</article-title>
                    <year>2019</year>; pp.<fpage>1</fpage>&#x2013;<lpage>9</lpage>.</mixed-citation>
            </ref>
            <ref id="ref15">
                <label>15</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Mohammed</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Rawashdeh</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Abdullah</surname>
                            <given-names>M</given-names>
                        </name>
</person-group>:
                    <article-title>Machine Learning with Oversampling and Undersampling Techniques: Overview Study and Experimental Results.</article-title>
                    <source>

                        <italic toggle="yes">2020 11th Int. Conf. Inf. Commun. Syst. ICICS 2020.</italic>
</source>
                    <year>2020</year>;<fpage>243</fpage>&#x2013;<lpage>248</lpage>.</mixed-citation>
            </ref>
            <ref id="ref16">
                <label>16</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Chawla</surname>
                            <given-names>NV</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bowyer</surname>
                            <given-names>KW</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hall</surname>
                            <given-names>LO</given-names>
                        </name>
</person-group>:
                    <article-title>SMOTE: Synthetic Minority Over-sampling Technique.</article-title>
                    <year>2002</year>;<volume>16</volume>:<fpage>321</fpage>&#x2013;<lpage>357</lpage>.</mixed-citation>
            </ref>
            <ref id="ref17">
                <label>17</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Gosain</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Sardana</surname>
                            <given-names>S</given-names>
                        </name>
</person-group>:
                    <article-title>Handling class imbalance problem using oversampling techniques: A review.</article-title>
                    <source>

                        <italic toggle="yes">2017 Int. Conf. Adv. Comput. Commun. Informatics, ICACCI 2017.</italic>
</source>
                    <year>2017</year>;<volume>2017-January</volume>:<fpage>79</fpage>&#x2013;<lpage>85</lpage>.</mixed-citation>
            </ref>
            <ref id="ref18">
                <label>18</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kotsiantis</surname>
                            <given-names>SB</given-names>
                        </name>
</person-group>:
                    <article-title>Supervised Machine Leanring: A Review of Classification Techniques.</article-title>
                    <source>

                        <italic toggle="yes">Informatica.</italic>
</source>
                    <year>2007</year>;<volume>31</volume>(<issue>2007</issue>):<fpage>249</fpage>&#x2013;<lpage>268</lpage>.</mixed-citation>
            </ref>
            <ref id="ref19">
                <label>19</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kotu</surname>
                            <given-names>V</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Deshpande</surname>
                            <given-names>B</given-names>
                        </name>
</person-group>:
                    <article-title>Model Evaluation.</article-title>
                    <source>

                        <italic toggle="yes">Predict. Anal. Data Min.</italic>
</source>
                    <year>2015</year>;<fpage>257</fpage>&#x2013;<lpage>273</lpage>.
                    <pub-id pub-id-type="doi">10.1016/B978-0-12-801460-8.00008-2</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref20">
                <label>20</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Bellamy</surname>
                            <given-names>RKE</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>AI Fairness 360: An Extensible Toolkit for Detecting, Understanding, and Mitigating Unwanted Algorithmic Bias.</article-title>
                    <year>2018</year>.</mixed-citation>
            </ref>
            <ref id="ref21">
                <label>21</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Dwork</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hardt</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Pitassi</surname>
                            <given-names>T</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Fairness through awareness.</article-title>
                    <source>

                        <italic toggle="yes">ITCS 2012 - Innov. Theor. Comput. Sci. Conf.</italic>
</source>
                    <year>2012</year>;<fpage>214</fpage>&#x2013;<lpage>226</lpage>.</mixed-citation>
            </ref>
            <ref id="ref22">
                <label>22</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Calmon</surname>
                            <given-names>FP</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wei</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ramamurthy</surname>
                            <given-names>KN</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Optimized Data Pre-Processing for Discrimination Prevention.</article-title>
                    <year>2017</year>;<fpage>1</fpage>&#x2013;<lpage>18</lpage>.</mixed-citation>
            </ref>
            <ref id="ref23">
                <label>23</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Feldman</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Friedler</surname>
                            <given-names>SA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Moeller</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Certifying and removing disparate impact.</article-title>
                    <source>

                        <italic toggle="yes">Proc. ACM SIGKDD Int. Conf. Knowl. Discov. Data Min.</italic>
</source>
                    <year>2015</year>;<volume>2015-Augus</volume>:<fpage>259</fpage>&#x2013;<lpage>268</lpage>.</mixed-citation>
            </ref>
            <ref id="ref24">
                <label>24</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Zafar</surname>
                            <given-names>MB</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Valera</surname>
                            <given-names>I</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Rodriguez</surname>
                            <given-names>MG</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Fairness Constraints: Mechanisms for Fair Classification.</article-title>
                    <year>2017</year>;<volume>54</volume>.</mixed-citation>
            </ref>
            <ref id="ref25">
                <label>25</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Hardt</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Price</surname>
                            <given-names>E</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Srebro</surname>
                            <given-names>N</given-names>
                        </name>
</person-group>:
                    <article-title>Equality of opportunity in supervised learning.</article-title>
                    <source>

                        <italic toggle="yes">Adv. Neural Inf. Proces. Syst.</italic>
</source>
                    <year>2016</year>;<fpage>3323</fpage>&#x2013;<lpage>3331</lpage>.</mixed-citation>
            </ref>
            <ref id="ref26">
                <label>26</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Zemel</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wu</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Swersky</surname>
                            <given-names>K</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Learning fair representations.</article-title>
                    <source>

                        <italic toggle="yes">30th Int. Conf. Mach. Learn. ICML 2013.</italic>
</source>
                    <year>2013</year>;<volume>28</volume>(<issue>PART 2</issue>):<fpage>1362</fpage>&#x2013;<lpage>1370</lpage>.</mixed-citation>
            </ref>
            <ref id="ref27">
                <label>27</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Calmon</surname>
                            <given-names>FP</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wei</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Vinzamuri</surname>
                            <given-names>B</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Optimized pre-processing for discrimination prevention.</article-title>
                    <source>

                        <italic toggle="yes">Adv. Neural Inf. Proces. Syst.</italic>
</source>
                    <year>2017</year>;<volume>2017-Decem</volume>(<issue>Nips</issue>):<fpage>3993</fpage>&#x2013;<lpage>4002</lpage>.</mixed-citation>
            </ref>
            <ref id="ref28">
                <label>28</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Zhang</surname>
                            <given-names>BH</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lemoine</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mitchell</surname>
                            <given-names>M</given-names>
                        </name>
</person-group>:
                    <article-title>Mitigating Unwanted Biases with Adversarial Learning.</article-title>
                    <source>

                        <italic toggle="yes">AIES 2018 - Proc. 2018 AAAI/ACM Conf. AI, Ethics, Soc.</italic>
</source>
                    <year>2018</year>;<fpage>335</fpage>&#x2013;<lpage>340</lpage>.</mixed-citation>
            </ref>
            <ref id="ref29">
                <label>29</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Pleiss</surname>
                            <given-names>G</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Raghavan</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wu</surname>
                            <given-names>F</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>On fairness and calibration.</article-title>
                    <source>

                        <italic toggle="yes">Adv. Neural Inf. Proces. Syst.</italic>
</source>
                    <year>2017</year>;<volume>2017-Decem</volume>(<issue>Nips</issue>):<fpage>5681</fpage>&#x2013;<lpage>5690</lpage>.</mixed-citation>
            </ref>
            <ref id="ref30">
                <label>30</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Gursoy</surname>
                            <given-names>ME</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Tamersoy</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Truex</surname>
                            <given-names>S</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Secure and Utility-Aware Data Collection with Condensed Local Differential Privacy.</article-title>
                    <source>

                        <italic toggle="yes">IEEE Trans. Dependable Secur. Comput.</italic>
</source>
                    <year>2019</year>;<volume>X</volume>:<fpage>1</fpage>&#x2013;<lpage>1</lpage>.</mixed-citation>
            </ref>
            <ref id="ref31">
                <label>31</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Hube</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Fetahu</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gadiraju</surname>
                            <given-names>U</given-names>
                        </name>
</person-group>:
                    <article-title>Understanding and mitigating worker biases in the crowdsourced collection of subjective judgments.</article-title>
                    <source>

                        <italic toggle="yes">Conf. Hum. Factors Comput. Syst. - Proc.</italic>
</source>
                    <year>2019</year>.</mixed-citation>
            </ref>
            <ref id="ref32">
                <label>32</label>
                <mixed-citation publication-type="journal">
                    <collab>mawmaw</collab>:
                    <article-title>mawmaw/fairness_churn: Initial (v1.0).</article-title>
                    <source>

                        <italic toggle="yes">Zenodo.</italic>
</source>
                    <year>2021</year>.
                    <pub-id pub-id-type="doi">10.5281/zenodo.5516218</pub-id>
                </mixed-citation>
            </ref>
        </ref-list>
    </back>
    <sub-article article-type="reviewer-report" id="report148204">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.134711.r148204</article-id>
            <title-group>
                <article-title>Reviewer response for version 2</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Khor</surname>
                        <given-names>Kok-Chin</given-names>
                    </name>
                    <xref ref-type="aff" rid="r148204a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0001-9346-1479</uri>
                </contrib>
                <aff id="r148204a1">
                    <label>1</label>Department of Internet Engineering and Computer Science, Lee Kong Chian Faculty of Engineering and Science, Universiti Tunku Abdul Rahman, Selangor, Malaysia</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>30</day>
                <month>8</month>
                <year>2022</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2022 Khor KC</copyright-statement>
                <copyright-year>2022</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport148204" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.72929.2"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>DSTs (one under-sampling and three over-samplings) were applied to the three datasets. However, it would be clear for the readers if the authors could specify how many percent of the major class was under-sampled and how many percent of the minor class was over-sampled.</p>
            <p> </p>
            <p> It is observed that a variable, "is-senior", remained unbalanced even after applying the DSTs. Please explain whether the DSTs (under-sampling or over-sampling?) were applied to the whole dataset dimension or some specific variables only.</p>
            <p> </p>
            <p> It is stated that the under-sampling technique modifies the data by decreasing the majority of instances, which makes the dataset lack useful information for learning. But this should benefit the classification algorithm as the impact from the major class reduces. The authors can probably explain from the perspective of the rarity of the churn class.</p>
            <p> </p>
            <p> For the tables, please state the reason certain numbers are bolded. Suggest to put the reason in the caption.</p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>Yes</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>Yes</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Partly</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>Yes</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>Yes</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Partly</p>
            <p>Reviewer Expertise:</p>
            <p>Data Mining and Computer Networks</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard.</p>
        </body>
    </sub-article>
    <sub-article article-type="reviewer-report" id="report120732">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.76542.r120732</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>P</surname>
                        <given-names>Prabu</given-names>
                    </name>
                    <xref ref-type="aff" rid="r120732a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-4971-0340</uri>
                </contrib>
                <aff id="r120732a1">
                    <label>1</label>Department of Computer Science, CHRIST University, Bengaluru, Karnataka, 560029, India</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>24</day>
                <month>2</month>
                <year>2022</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2022 P P</copyright-statement>
                <copyright-year>2022</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport120732" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.72929.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve-with-reservations</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>This work mainly focus on data imbalance problems in customer churn prediction. The author has to incorporate the following suggestion in his/her article in-order to improve the quality of the work 
                <list list-type="order">
                    <list-item>
                        <p>The author need to clearly defined performance metrics for comparing random forest classifier with other classifiers.&#x00a0;</p>
                    </list-item>
                    <list-item>
                        <p>More experimental studies required for unequal balancing datasets and author expected to try the experiment with large datasets.</p>
                    </list-item>
                    <list-item>
                        <p>Author need to identify some proper technique to manage outliers in the datasets instead of removing manually.</p>
                    </list-item>
                    <list-item>
                        <p>Results and Algorithms fairness need to be explain properly.</p>
                    </list-item>
                    <list-item>
                        <p>Introduction section are too short and the section is not continuous. Correct it. The introduction section is not clearly explaining the basics and also lack of some details of about "issues in existing work, scope of work, goal of work and organization of work" can be included.</p>
                    </list-item>
                    <list-item>
                        <p>No significant limitations are discussed. A number of limitations and learning points are also be considered after the conclusion.</p>
                    </list-item>
                    <list-item>
                        <p>Author could take more recent research papers for the literature review and most of the literature review papers are not suitable to the proposed work. Author is expected to include few existing recommender system suggested for e-commerce platform. So I would suggest the author should identify few more recent papers based on his proposed research work and also include the objective and limitations of each work in table format.</p>
                    </list-item>
                </list>
            </p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>Yes</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>Partly</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Partly</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>Yes</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>Yes</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Yes</p>
            <p>Reviewer Expertise:</p>
            <p>Machine Learning, Cloud Computing ad Mathematical Modeling</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above.</p>
        </body>
        <sub-article article-type="response" id="comment8344-120732">
            <front-stub>
                <contrib-group>
                    <contrib contrib-type="author">
                        <name>
                            <surname>.</surname>
                            <given-names>Haw Su Cheng</given-names>
                        </name>
                        <aff/>
                    </contrib>
                </contrib-group>
                <author-notes>
                    <fn fn-type="conflict">
                        <p>
                            <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                    </fn>
                </author-notes>
                <pub-date pub-type="epub">
                    <day>6</day>
                    <month>6</month>
                    <year>2022</year>
                </pub-date>
            </front-stub>
            <body>
                <p>
                    <list list-type="order">
                        <list-item>
                            <p>AUC-ROC score was applied to compare the performance of the respective classifiers and this information is added for clarification in the discussion section/ performance measure section</p>
                        </list-item>
                        <list-item>
                            <p>Due to the dataset availability on propriety company, the focus on this paper is on 100,000 records only. Testing with large dataset is put as future work. In future work, we would like to test the same procedure on a larger dataset and would like to measure more algorithmic fairness metrics to investigate the best suitable algorithmic measures for the CCP task.&#x00a0;</p>
                        </list-item>
                        <list-item>
                            <p>Since there were not a lot of outliers in the original dataset, they were manually removed and we added this fact in the data pre-processing section.</p>
                        </list-item>
                        <list-item>
                            <p>In our study, we do not go further process of mitigating algorithmic unfairness yet. If so, we will need to apply and discuss. But here, we only evaluate and compare the bias in the training dataset before and after applying the data sampling techniques.&#x00a0;</p>
                        </list-item>
                        <list-item>
                            <p>Introduction section is updated with related works, scope of work and organization work. Since my background is the combination of three different topics, algorithmic fairness, customer churn prediction and data sampling techniques, Due to the page limitation, could not put a lot of materials in the introduction section but most important and relevant background study was provided in the introduction section in a compact way. Goal of work is provided in the last paragraph of the introduction section, &#x201c;The main goal of this study is to explore and identify the impact of using DSTs on training data on algorithmic fairness in the CCP process.&#x201d;</p>
                        </list-item>
                        <list-item>
                            <p>We updated with the limitations in the conclusion section as per suggestion.</p>
                        </list-item>
                        <list-item>
                            <p>Although there is very limited work which are related to my work, we organize some of the articles of fairness in recommender system in the table format as per your suggestion</p>
                        </list-item>
                    </list>
                </p>
            </body>
        </sub-article>
    </sub-article>
    <sub-article article-type="reviewer-report" id="report96164">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.76542.r96164</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Loo</surname>
                        <given-names>Chu Kiong</given-names>
                    </name>
                    <xref ref-type="aff" rid="r96164a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0001-7867-2665</uri>
                </contrib>
                <aff id="r96164a1">
                    <label>1</label>Faculty of Computer Science and Information Technology, University of Malaya, Kuala Lumpur, Malaysia</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>11</day>
                <month>1</month>
                <year>2022</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2022 Loo CK</copyright-statement>
                <copyright-year>2022</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport96164" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.72929.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve-with-reservations</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>
                <list list-type="bullet">
                    <list-item>
                        <p>This paper addresses an important issue of algorithmic fairness, i.e. to investigate whether the results pose any discrimination between male and female groups and compare the results before and after using DSTs.&#x00a0;</p>
                    </list-item>
                    <list-item>
                        <p>I suggest to add some discussions on confusion matrix, learning curve and the improvement of fairness index before and after implementing the DSTs.</p>
                    </list-item>
                </list>
            </p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>Yes</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>Yes</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Yes</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>Yes</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>Yes</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Yes</p>
            <p>Reviewer Expertise:</p>
            <p>Machine learning</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above.</p>
        </body>
        <sub-article article-type="response" id="comment8343-96164">
            <front-stub>
                <contrib-group>
                    <contrib contrib-type="author">
                        <name>
                            <surname>.</surname>
                            <given-names>Haw Su Cheng</given-names>
                        </name>
                        <aff/>
                    </contrib>
                </contrib-group>
                <author-notes>
                    <fn fn-type="conflict">
                        <p>
                            <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                    </fn>
                </author-notes>
                <pub-date pub-type="epub">
                    <day>6</day>
                    <month>6</month>
                    <year>2022</year>
                </pub-date>
            </front-stub>
            <body>
                <p>Learning curves for Random Forest before and after applying DSTs (for three versions of datasets) are provided. We discussed briefly in the discussion section as well.</p>
            </body>
        </sub-article>
    </sub-article>
</article>
