<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN" "http://jats.nlm.nih.gov/publishing/1.2/JATS-journalpublishing1.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="1.2" xml:lang="en">
    <front>
        <journal-meta>
            <journal-id journal-id-type="pmc">F1000Research</journal-id>
            <journal-title-group>
                <journal-title>F1000Research</journal-title>
            </journal-title-group>
            <issn pub-type="epub">2046-1402</issn>
            <publisher>
                <publisher-name>F1000 Research Limited</publisher-name>
                <publisher-loc>London, UK</publisher-loc>
            </publisher>
        </journal-meta>
        <article-meta>
            <article-id pub-id-type="doi">10.12688/f1000research.140395.2</article-id>
            <article-categories>
                <subj-group subj-group-type="heading">
                    <subject>Research Article</subject>
                </subj-group>
                <subj-group>
                    <subject>Articles</subject>
                </subj-group>
            </article-categories>
            <title-group>
                <article-title>COVID-19 Vaccine: Predicting Vaccine Types and Assessing Mortality Risk Through Ensemble Learning Algorithms</article-title>
                <fn-group content-type="pub-status">
                    <fn>
                        <p>[version 2; peer review: 2 approved, 1 approved with reservations]</p>
                    </fn>
                </fn-group>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Monadhel</surname>
                        <given-names>Hind</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Funding Acquisition</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Software</role>
                    <role content-type="http://credit.niso.org/">Visualization</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-9923-4979</uri>
                    <xref ref-type="corresp" rid="c1">a</xref>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Abbas</surname>
                        <given-names>Ayad R.</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Supervision</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-9797-421X</uri>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Mohammed</surname>
                        <given-names>Athraa Jasim</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Supervision</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a3">3</xref>
                </contrib>
                <aff id="a1">
                    <label>1</label>Computer Science, University of Technology - Iraq, Baghdad, Iraq</aff>
                <aff id="a2">
                    <label>2</label>Computer Science, University of Technology- Iraq, Baghdad, Iraq</aff>
                <aff id="a3">
                    <label>3</label>Computer Science, University of Technology- Iraq, Baghdad, Iraq</aff>
            </contrib-group>
            <author-notes>
                <corresp id="c1">
                    <label>a</label>
                    <email xlink:href="mailto:cs.20.38@grad.uotechnology.edu.iq">cs.20.38@grad.uotechnology.edu.iq</email>
                </corresp>
                <fn fn-type="conflict">
                    <p>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>23</day>
                <month>8</month>
                <year>2024</year>
            </pub-date>
            <pub-date pub-type="collection">
                <year>2023</year>
            </pub-date>
            <volume>12</volume>
            <elocation-id>1200</elocation-id>
            <history>
                <date date-type="accepted">
                    <day>17</day>
                    <month>6</month>
                    <year>2024</year>
                </date>
            </history>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2024 Monadhel H et al.</copyright-statement>
                <copyright-year>2024</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access article distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <self-uri content-type="pdf" xlink:href="https://f1000research.com/articles/12-1200/pdf"/>
            <abstract>
                <sec>
                    <title>Background</title>
                    <p>There is no doubt that vaccination is crucial for preventing the spread of diseases; however, not every vaccine is perfect or will work for everyone. The main objective of this work is to predict which vaccine will be most effective for a candidate without causing severe adverse reactions and to categorize a patient as potentially at high risk of death from the COVID-19 vaccine.</p>
                </sec>
                <sec>
                    <title>Methods</title>
                    <p>A comprehensive analysis was conducted using a dataset on COVID-19 vaccine adverse reactions, exploring binary and multiclass classification scenarios. Ensemble models, including Random Forest, Decision Tree, Light Gradient Boosting, and extreme gradient boosting algorithm, were utilized to achieve accurate predictions. Class balancing techniques like SMOTE, TOMEK_LINK, and SMOTETOMEK were incorporated to enhance model performance.</p>
                </sec>
                <sec>
                    <title>Results</title>
                    <p>The study revealed that pre-existing conditions such as diabetes, hypertension, heart disease, history of allergies, prior vaccinations, other medications, age, and gender were crucial factors associated with poor outcomes. Moreover, using medical history, the ensemble learning classifiers achieved accuracy scores ranging from 75% to 87% in predicting the vaccine type and mortality possibility. The Random Forest model emerged as the best prediction model, while the implementation of the SMOTE and SMOTETOMEK methods generally improved model performance.</p>
                </sec>
                <sec>
                    <title>Conclusion</title>
                    <p>The random forest model emerges as the top recommendation for machine learning tasks that require high accuracy and resilience. Moreover, the findings highlight the critical role of medical history in optimizing vaccine outcomes and minimizing adverse reactions.</p>
                </sec>
            </abstract>
            <kwd-group kwd-group-type="author">
                <kwd>Classification algorithm</kwd>
                <kwd>COVID-19 Vaccine</kwd>
                <kwd>ensemble learning</kwd>
                <kwd>machine learning</kwd>
                <kwd>Sampling methods</kwd>
                <kwd>Side effects.</kwd>
            </kwd-group>
            <funding-group>
                <funding-statement>The author(s) declared that no grants were involved in supporting this work.</funding-statement>
            </funding-group>
        </article-meta>
        <notes>
            <sec sec-type="version-changes">
                <label>Revised</label>
                <title>Amendments from Version 1</title>
                <p>All the reviewers' comments have been addressed, and a new section was added to clarify the algorithm and sampling method selection, as well as parameter tuning. Additionally, a new table and figure were included.</p>
            </sec>
        </notes>
    </front>
    <body>
        <def-list>
            <title>Abbreviations</title>
            <def-item>
                <term id="G4">COVID-19</term>
                <def>
                    <p>Coronavirus Disease 2019</p>
                </def>
            </def-item>
            <def-item>
                <term id="G6">DT</term>
                <def>
                    <p>Decision Trees</p>
                </def>
            </def-item>
            <def-item>
                <term id="G9">LGBM</term>
                <def>
                    <p>Light Gradient Boosting Machine</p>
                </def>
            </def-item>
            <def-item>
                <term id="G5">ML</term>
                <def>
                    <p>Machine Learning</p>
                </def>
            </def-item>
            <def-item>
                <term id="G3">mRNA</term>
                <def>
                    <p>messenger ribonucleic acid</p>
                </def>
            </def-item>
            <def-item>
                <term id="G7">RF</term>
                <def>
                    <p>Random Forests</p>
                </def>
            </def-item>
            <def-item>
                <term id="G2">SARS-CoV</term>
                <def>
                    <p>Severe Acute Respiratory Syndrome- associated coronavirus</p>
                </def>
            </def-item>
            <def-item>
                <term id="G10">SMOTE</term>
                <def>
                    <p>Synthetic Minority Oversampling Technique</p>
                </def>
            </def-item>
            <def-item>
                <term id="G1">VAERS</term>
                <def>
                    <p>Vaccine Adverse Event Reporting System</p>
                </def>
            </def-item>
            <def-item>
                <term id="G8">XGB</term>
                <def>
                    <p>extreme Gradient Boosting Machine</p>
                </def>
            </def-item>
        </def-list>
        <sec id="sec1" sec-type="intro">
            <title>Introduction</title>
            <p>From seven to 13 years of research and development (R&amp;D) and 1.8 million clinical trials to develop a vaccine in the past, we have transitioned to 10 to 18 months of R&amp;D and tens of thousands of clinical trials to start vaccinating against COVID-19 in 2021.
                <sup>
                    <xref ref-type="bibr" rid="ref1">1</xref>
                </sup>
            </p>
            <p>Vaccines are biologics that provide active adaptive immunity against particular diseases. The vaccine usually contains drugs similar to the microorganisms that cause the disease. It is generally made from one of the killed or attenuated micro-organisms, its toxins, or its surface proteins. Giving us an injection, nasal spray, or oral vaccine stimulates our immune system to recognize and destroy foreign bodies.
                <sup>
                    <xref ref-type="bibr" rid="ref2">2</xref>
                </sup>
            </p>
            <p>As a result of the novel coronavirus's rapid dissemination and disease burden, pharmaceutical companies and researchers were forced to create vaccinations quickly using either novel or preexisting technologies.
                <sup>
                    <xref ref-type="bibr" rid="ref3">3</xref>
                </sup> There are several different types of vaccines, and the purpose of each type is to boost your immune system and prevent serious, life-threatening diseases from occurring.
                <sup>
                    <xref ref-type="bibr" rid="ref4">4</xref>
                </sup> The COVID-19 vaccines that have been approved employ a variety of mechanisms of action, including mRNA, DNA vaccines, viral vectors, protein subunits, and virus-inactivated vaccination techniques.
                <sup>
                    <xref ref-type="bibr" rid="ref5">5</xref>
                </sup> Three vaccines have been widely administered: Pfizer and Moderna (mRNA) vaccinations targeting the SARS-CoV-2 surface protein, and the Janssen (viral vector) vaccine, which employed pre-existing technology with an adenovirus vector to trigger an immune response and provide protection against further infection. As these vaccines were developed using various approaches, they differ in efficacy and storage conditions.
                <sup>
                    <xref ref-type="bibr" rid="ref6">6</xref>
                </sup>
            </p>
            <p>However, no vaccine is entirely free from complications or adverse reactions. Any vaccination can have early adverse reactions, including local ones like pain, swelling, and redness, as well as systemic ones like headache, chills, nausea, fatigue, myalgia, and fever.
                <sup>
                    <xref ref-type="bibr" rid="ref7">7</xref>
                </sup> Also, several existing health conditions or symptoms the candidate already has can lead to severe adverse reactions after taking the COVID-19 vaccine. The candidate's death could be the worst-case scenario. As a result, it's critical to know about the candidate's previous medical history.
                <sup>
                    <xref ref-type="bibr" rid="ref8">8</xref>
                </sup>
            </p>
            <p>This paper delves into an in-depth analysis of adverse effects associated with COVID-19 vaccination using data mining techniques to predict the most appropriate vaccine for individual candidates and identify patients at high risk of mortality from COVID-19 vaccination. To accomplish these pivotal objectives, an extensive analysis was conducted using a comprehensive COVID-19 vaccine adverse reaction dataset, shedding light on crucial factors influencing vaccine outcomes.</p>
            <p>This work's main contribution can be summarized as follows:
                <list list-type="order">
                    <list-item>
                        <label>1.</label>
                        <p>Identify the most important features of an individual's medical history that could contribute to adverse reactions to vaccination.</p>
                    </list-item>
                    <list-item>
                        <label>2.</label>
                        <p>Identify the most important features that contributed to the death of the candidate based on his or her medical history.</p>
                    </list-item>
                    <list-item>
                        <label>3.</label>
                        <p>Address the challenge of the imbalanced dataset by employing sampling methods to effectively handle the imbalance and improve the reliability of the analysis.</p>
                    </list-item>
                    <list-item>
                        <label>4.</label>
                        <p>Develop a machine learning (ML) model capable of predicting and classifying the most suitable vaccine types for each candidate, thus helping to prevent severe consequences and ensure optimal vaccination outcomes.</p>
                    </list-item>
                </list>
            </p>
            <p>The rest of this paper is organized as follows: In the next section, we discuss a brief review of the literature on various related works. Section 3, provides a detailed explanation of our methodology and dataset. Section 4 discusses the study findings, while Section 5 covers the strengths and limitations. Section 6 presents the conclusions, and Section 7 outlines the future work.</p>
        </sec>
        <sec id="sec2">
            <title>Literature review</title>
            <p>Due to the rapid advancement of technology, there are numerous opportunities and possibilities for ML in healthcare.
                <sup>
                    <xref ref-type="bibr" rid="ref9">9</xref>
                </sup> Classification is the most well-known machine-learning technique in medical applications because it is similar to everyday problems. A classification algorithm builds a model based on training data and then applies it to test data to obtain a prediction.
                <sup>
                    <xref ref-type="bibr" rid="ref10">10</xref>
                </sup>
            </p>
            <p>Interestingly, some studies have utilized machine learning applications to predict side effects, reactogenicity, and morbidity incidence following COVID-19 vaccinations. Research by Sujatha 
                <italic toggle="yes">et al</italic>.,
                <sup>
                    <xref ref-type="bibr" rid="ref8">8</xref>
                </sup> the authors developed a model to predict whether a candidate is suitable for COVID-19 vaccination. Four machine learning approaches, namely Logistic Regression, AdaBoost, Random Forest, and Decision Tree were employed in the task of prediction. The authors found that AdaBoost was the classifier with the best performance, achieving an accuracy of 0.98. The number of symptoms has been restricted to five for the sake of proper implementation. While this limitation streamlines the analysis process, it may overlook potential rare symptoms or nuances in symptomatology, impacting the comprehensiveness of the study's conclusions.</p>
            <p>In research by Hatmal. M 
                <italic toggle="yes">et al.,</italic>
                <sup>
                    <xref ref-type="bibr" rid="ref11">11</xref>
                </sup> the authors used machine learning and ensemble methods to predict the severity of side effects, defined as none, mild, moderate, or severe. The analysis revealed that random forest and XGBoost achieved the highest accuracy (0.80 and 0.79, respectively) and Cohen&#x2019;s &#x03ba; values (0.71 and 0.70, respectively). Statistical data analysis revealed that side effects significantly varied based on vaccine type. According to this study, the COVID-19 vaccines approved by the CDC are safe, and vaccination instills a sense of safety in people. However, severe cases may require additional medical care or even hospitalization. The dataset suffered from uneven gender and profession representation, potential result misclassification, and reliance on a self-reported online survey.</p>
            <p>In research by Lian 
                <italic toggle="yes">et al.,</italic>
                <sup>
                    <xref ref-type="bibr" rid="ref12">12</xref>
                </sup> the goal was to collect and analyze tweets about the COVID-19 vaccination to find posts about personal experiences with COVID-19 vaccine adverse events. The authors found that the ensemble model-based RF achieves the best performance with an F1 score of 0.926, an accuracy of 0.908, and a recall of 0.946. The named entity recognition (NER) model achieved an F1 score of 0.770 for detecting adverse events using the conditional random fields (CRF) algorithm. Also, the results show that the three COVID-19 vaccines' (Pfizer, Moderna, and Johnson &amp; Johnson) most common side effects are soreness to touch, fatigue, and headache. Notably, the majority of the participants were young. Additionally, the survey was conducted in a single language, which may present challenges for individuals with Limited English Proficiency (LEP). This linguistic limitation could impact the inclusivity and representation of diverse perspectives in the study.</p>
        </sec>
        <sec id="sec3" sec-type="methods">
            <title>Methods</title>
            <p>The overview of the general methodology for developing a machine learning models is visualized in 
                <xref ref-type="fig" rid="f1">Figure 1</xref>. In this study, we focus on predicting which vaccine will be most effective for a candidate without causing severe adverse reactions (output) based on several factors (input) and handling the imbalanced data that falls under the Pre-processing step where the data preparation process takes place.</p>
            <fig fig-type="figure" id="f1" orientation="portrait" position="float">
                <label>Figure 1. </label>
                <caption>
                    <title>Prediction methodology architecture.</title>
                </caption>
                <graphic id="gr1" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/167975/daec2e63-3066-4001-8088-f41ca694dec9_figure1.gif"/>
            </fig>
            <sec id="sec4">
                <title>Dataset</title>
                <p>The raw data of individuals who received vaccinations and reported adverse reactions was obtained from the VAERS.
                    <sup>
                        <xref ref-type="bibr" rid="ref13">13</xref>
                    </sup> This dataset contains vaccination information for individuals vaccinated against a variety of diseases including COVID-19, Polio, Tetanus, and Influenza. However, our current study omitted any non-SARS-CoV-2 (COVID-19) vaccination information. Therefore, the dataset being used consists of 49,810 individuals. This dataset has various attributes of individuals&#x2019; information such as age, gender, current illness, medical history, allergic history, type of vaccine, life-threatening illness, symptoms after vaccinations, etc. Some of these attributes have been found to be textual (e.g., medical history, symptoms text, etc.), while others have been found to be numerical (such as age, number of doses, etc.). The description of some different attributes in the VAERS data set is illustrated in 
                    <xref ref-type="table" rid="T1">Table 1</xref>.</p>
                <table-wrap id="T1" orientation="portrait" position="float">
                    <label>Table 1. </label>
                    <caption>
                        <title>Description of some attributes in the VAERS dataset.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Number</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Features</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Description</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Range</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Mean</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Standard Deviation</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">1</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">AGE_YRS (AS)</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Age in years</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">16-109</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">57.13</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">18.43</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">2</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">SEX (S)</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Sex information: (0: Female, 1: Male)</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0-1</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">56.12</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">229.37</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">3</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">OTHER_MEDS (OM)</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Other medications currently being taken</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0-1</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.46</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.49</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">4</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">CUR_ILL(CL)</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Illnesses at the time of vaccination</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0-1</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.26</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.43</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">5</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">PRIOR_VAX(PV)</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Any prior vaccination information</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0-1</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.02</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.14</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">6</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">VAX_NAME (VN)</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Vaccination name</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0-2</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.46</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.49</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">7</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Medical History (MH)</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Pre-existing chronic or long-standing health conditions</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0-1</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.46</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.49</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">8</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">ALLERGIES (A)</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Any allergy history</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0-1</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.25</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.43</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">9</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Died (D)</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Died</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0-1</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.14</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.35</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
            </sec>
            <sec id="sec5">
                <title>Preprocessing</title>
                <p>The quality of raw data used to perform any analysis heavily influences its outcome. Therefore, the preprocessing and exploratory analysis of data becomes the most important parts of any data-driven investigation. The preprocessing of a dataset involved examining the data for missing values, irrelevant values, replicas, etc. whereas EDA assists in understanding data by visualizing it. It has been noticed that the dataset contains many missing and irrelevant values.</p>
                <p>Any COVID-19 vaccine types that were not specified were removed, and only two types of values in the sex field were considered: &#x201c;M&#x201d; as male and &#x201c;F&#x201d; as female. Unknown values were excluded. In the died field, &#x2018;Y&#x2019; was considered yes, and the rest were considered &#x2018;no&#x2019;; in the &#x2018;prior vaccine&#x2019; field, &#x2018;yes&#x2019; was considered yes, and the rest were considered &#x2018;no&#x2019;. The analysis of allergic history included considering mentioned allergic effects as positive cases and considering &#x2018;null&#x2019;, &#x2018;none&#x2019;, &#x2018;NA&#x2019;, and other negatively mentioned text as negative cases. The History column in the dataset contained written records of coexisting conditions, requiring the extraction of all of the patient's medical history separately. To better understand the patient's medical history, information about pre-existing chronic and non-chronic diseases, such as chronic obstructive pulmonary disease, hypertension, diabetes, and kidney disease, was extracted. All missing values (i.e., empty, null) were excluded from this field, and spelling/grammar mistakes were fixed.</p>
                <p>In the Feature extraction step, most of the important features in the acquired dataset are presented as textual data. However, in order to analyze them, they must be separated into separate entities. As a result, String matching was used to convert all text data into attributes. The correlation plot (
                    <xref ref-type="fig" rid="f2">Figure 2</xref>) did not demonstrate a significant relationship between various attributes and vaccine types. Yet, previous studies revealed a direct correlation between vaccine adverse reactions and medical and allergic histories. Therefore, the number of unique entries for the diseases in the patient's medical histories was counted. Diseases with more than 300 counts in patients' medical histories were considered attributes, while the rest were ignored due to the large dataset and the computational burden associated with each individual disease. This study, therefore, considered 21 diseases which are diabetes mellitus, thyroid, different pain, obesity, migraine, kidney disease, hypertension, hyperlipidemia, high cholesterol, heart disease, Gastroesophageal Reflux Disease (GERD), depression, dementia, positive history of COVID-19, Chronic Obstructive Pulmonary Disease (COPD), cancer, atrial fibrillation, asthma, arthritis, anxiety, and anemia from the patient&#x2019;s medical history as attributes. Using the VAERS id, these files have been merged into one file after identifying and extracting features. The analyzed dataset has 28 different features and over 49,810 samples. The data was encoded using a one-hot encoding technique.</p>
                <fig fig-type="figure" id="f2" orientation="portrait" position="float">
                    <label>Figure 2. </label>
                    <caption>
                        <title>Correlation plot between different features of the VAERSA dataset.</title>
                    </caption>
                    <graphic id="gr2" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/167975/daec2e63-3066-4001-8088-f41ca694dec9_figure2.gif"/>
                </fig>
            </sec>
            <sec id="sec6">
                <title>Data-Sampling Algorithms</title>
                <p>In this study, only three methods of handling imbalanced data are used. In the first place, no changes are made to the data. Normally, it is divided into training and testing data at a ratio of 8 to 2. This first technique is referred to as &#x201c;Normal&#x201d; in this study. Next, experiments are conducted using well-known imbalanced data techniques called SMOTE, Tomek-links, and SMOTETOMEK, for balancing the dataset which combines SMOTE and Tomek links.
                    <sup>
                        <xref ref-type="bibr" rid="ref14">14</xref>
                    </sup> As with the previous experiment, the dataset is divided into training and testing data at a ratio of 8 to 2. This experiment aims to handle imbalanced data and further improve the performance of machine learning classification models, especially in the multiclass classification scenario. This experiment aims to handle imbalanced data and further improve the performance of machine learning classification models, especially in the multiclass classification scenario. These Sampling methods were selected considering data nature, imbalance ratio, algorithm compatibility, and analysis goals.</p>
            </sec>
            <sec id="sec7">
                <title>Description of Ensemble Methods </title>
                <p>To predict which vaccine will be most effective for a candidate without causing severe adverse reactions (output) based on several factors (input), different machine-learning algorithms were used to build the proposed model. These approaches were selected due to their accuracy, robustness, efficiency, scalability, and ability to handle large, high-dimensional datasets while reducing overfitting.</p>
                <p>Random Forest (RF)</p>
                <p>A multipurpose data mining approach for classification. It is based on decision trees that operate as an ensemble, an approach of combining multiple classifiers to identify problems and enhance accuracy. A classification is predicted by each tree independently, and votes for the relevant class, and the majority of votes decide the model&#x2019;s prediction. It can handle large dataset with high dimensionality, it also improves the accuracy of the model and eliminates the overfitting problem.
                    <sup>
                        <xref ref-type="bibr" rid="ref15">15</xref>
                    </sup>
                </p>
                <p>Decision Tree (DT)</p>
                <p>A DT is a supervised learning technique that can be used for classification and regression problems; however, it is most commonly used to resolve classification issues. In this tree-organized classifier, the internal nodes represent datasets, branches represent decision rules, and each leaf node represents the outcome. A DT has two nodes: the decision node and the leaf node. The leaf nodes are the result of such decisions and they do not have any extra branches, but decision nodes are frequently used to settle any decision and have several branches. Based on the features of the dataset, decisions or tests are made.
                    <sup>
                        <xref ref-type="bibr" rid="ref16">16</xref>
                    </sup>
                </p>
                <p>Extreme Gradient Boosting (XGB)</p>
                <p>XGBoost is an ensemble learning method combining multiple weak models' predictions to generate a stronger prediction. In the beginning, XGB fits the data to a weak classifier. Afterward, the data is fitted to another weak classifier to increase accuracy without affecting the current model. In the same way, the process continues until the best accuracy is achieved.
                    <sup>
                        <xref ref-type="bibr" rid="ref17">17</xref>
                    </sup> Furthermore, XGBoost supports parallel processing, making it possible to train models on large datasets in a reasonable period of time.</p>
                <p>Light Grading Boosting Machine (LGBM)</p>
                <p>LGBM is an open-source gradient boosting algorithm based on a tree-based learning framework; it is an open-source GBDT algorithm designed by Microsoft Research Asia. This framework grew trees vertically (leaf-wise) rather than horizontally (level-wise) as other tree-based frameworks did. Therefore, it can reduce the losses more efficiently and handle huge dataset with less computational complexity due to its lighter version.
                    <sup>
                        <xref ref-type="bibr" rid="ref18">18</xref>
                    </sup>
                </p>
            </sec>
            <sec id="sec8">
                <title>Model performance evaluation </title>
                <p>Macro average and Weighted Average are used to calculate the performance of the four classifiers used for learning.</p>
                <p>In general, a confusion matrices are a 2 &#x00d7; 2 matrix. Where rows represent the instances in the actual class, and the columns represented the predicted class. It results in four possible outcomes: TP, FP, TN, and FN.</p>
                <fig fig-type="figure" id="f3" orientation="portrait" position="float">
                    <label>Figure 3. </label>
                    <caption>
                        <title>Confusion matrix examples.</title>
                        <p>(a) Binary classification confusion matrix. (b) Multiclass classification confusion matrix.
                            <sup>
                                <xref ref-type="bibr" rid="ref19">19</xref>
                            </sup>
                        </p>
                    </caption>
                    <graphic id="gr3" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/167975/daec2e63-3066-4001-8088-f41ca694dec9_figure3.gif"/>
                </fig>
                <p>Using the above outcomes, we can check whether the predictions are correct.
                    <sup>
                        <xref ref-type="bibr" rid="ref16">16</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref20">20</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref21">21</xref>
                    </sup>
                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>
                                <bold>Accuracy:</bold> This term tells us how many classifications were correct out of all classifications.
                                <disp-formula id="e4">
                                    <mml:math display="block">
                                        <mml:mtext>Accuracy</mml:mtext>
                                        <mml:mo>=</mml:mo>
                                        <mml:mfrac>
                                            <mml:mrow>
                                                <mml:mi>TP</mml:mi>
                                                <mml:mo>+</mml:mo>
                                                <mml:mi>TN</mml:mi>
                                            </mml:mrow>
                                            <mml:mrow>
                                                <mml:mi>TP</mml:mi>
                                                <mml:mo>+</mml:mo>
                                                <mml:mi>TN</mml:mi>
                                                <mml:mo>+</mml:mo>
                                                <mml:mi>FP</mml:mi>
                                                <mml:mo>+</mml:mo>
                                                <mml:mi>FN</mml:mi>
                                            </mml:mrow>
                                        </mml:mfrac>
                                    </mml:math>
                                </disp-formula>
                            </p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>
                                <bold>Precision:</bold> A model's precision tells us how reliable its predictions are.
                                <disp-formula id="e1">
                                    <mml:math display="block">
                                        <mml:mtext>Precision</mml:mtext>
                                        <mml:mo>=</mml:mo>
                                        <mml:mfrac>
                                            <mml:mi>TP</mml:mi>
                                            <mml:mrow>
                                                <mml:mi>TP</mml:mi>
                                                <mml:mo>+</mml:mo>
                                                <mml:mi>FP</mml:mi>
                                            </mml:mrow>
                                        </mml:mfrac>
                                    </mml:math>
                                </disp-formula>
                            </p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>
                                <bold>Recall:</bold> The model's ability to detect class.
                                <disp-formula id="e2">
                                    <mml:math display="block">
                                        <mml:mtext mathvariant="normal">Recall</mml:mtext>
                                        <mml:mo>=</mml:mo>
                                        <mml:mfrac>
                                            <mml:mi mathvariant="normal">TP</mml:mi>
                                            <mml:mrow>
                                                <mml:mi mathvariant="normal">TP</mml:mi>
                                                <mml:mo>+</mml:mo>
                                                <mml:mi mathvariant="normal">FN</mml:mi>
                                            </mml:mrow>
                                        </mml:mfrac>
                                    </mml:math>
                                </disp-formula>
                            </p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>
                                <bold>F-score:</bold> It will give us a harmonic mean of precision and recall.
                                <disp-formula id="e3">
                                    <mml:math display="block">
                                        <mml:mspace width="0.25em"/>
                                        <mml:mi mathvariant="normal">F</mml:mi>
                                        <mml:mo>&#x2212;</mml:mo>
                                        <mml:mtext>score</mml:mtext>
                                        <mml:mo>=</mml:mo>
                                        <mml:mn>2</mml:mn>
                                        <mml:mo>.</mml:mo>
                                        <mml:mfenced close=")" open="(">
                                            <mml:mfrac>
                                                <mml:mrow>
                                                    <mml:mtext mathvariant="normal">precision</mml:mtext>
                                                    <mml:mo>&#x00b7;</mml:mo>
                                                    <mml:mtext mathvariant="normal">recall</mml:mtext>
                                                </mml:mrow>
                                                <mml:mrow>
                                                    <mml:mtext mathvariant="normal">precision</mml:mtext>
                                                    <mml:mo>+</mml:mo>
                                                    <mml:mtext mathvariant="normal">recall</mml:mtext>
                                                </mml:mrow>
                                            </mml:mfrac>
                                        </mml:mfenced>
                                    </mml:math>
                                </disp-formula>
                            </p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>ROC Curve &amp; AUC</p>
                        </list-item>
                    </list>
                </p>
                <p>ROC Curves show the performance of the classification model across all classification thresholds. In a ROC curve, the TP rate and FP rate are plotted at each threshold of classification. &#x201c;AUC&#x201d; stands for &#x201c;Area Under the ROC Curve&#x201d;. It can be used as a classifier to distinguish between classes. In general, the higher the AUC value, the better the classifier is at identifying positive from negative classes.
                    <sup>
                        <xref ref-type="bibr" rid="ref22">22</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref23">23</xref>
                    </sup>
                </p>
            </sec>
            <sec id="sec9">
                <title>Algorithms Hyperparameter Tuning Using Grid Search</title>
                <p>Hyperparameter tuning is a crucial step in the data mining model development process, involving the refinement of hyperparameters within a data mining algorithm to uncover the optimal combination that enhances classifier performance. The Grid Search approach is a widely recognized and effective method for hyperparameter tuning.
                    <sup>
                        <xref ref-type="bibr" rid="ref24">24</xref>
                    </sup> In the context of this paper, Grid Search was employed using the GridSearchCV object from scikit-learn to thoroughly explore and identify the hyperparameter set that consistently produces the most favorable results. Known for its systematic and methodical approach to hyperparameter tuning, Grid Search operates by specifying a set of hyperparameters and their potential values, creating a grid of all possible combinations, and assessing the model's performance for each. This method exhaustively searches through the grid, identifying the hyperparameters that consistently yield the best results, and fine-tunes the model for optimal performance.</p>
                <p>A comprehensive overview of the default parameters for the data mining classifiers is provided in the appendix. Additionally, it details the parameters specifically assigned to each classifier for the purpose of randomized parameter optimization to enhance performance.</p>
            </sec>
        </sec>
        <sec id="sec10" sec-type="results|discussion">
            <title>Results and Discussion</title>
            <p>The majority of the individuals, 74% in total, were identified as female It was estimated that the average age of the individuals was about 53 years old and that the average age of those who died was about 72 years. Thus, there is a noticeable age difference between the two groups. In terms of the reported chronic diseases, chronic hypertension emerged as the most prevalent at 13%, followed by asthma at 12%. Kidney issues and anemia were reported in approximately 2% of the cases. Understanding the prevalence of pre-existing conditions is crucial in assessing the potential impact of the vaccine on individuals with specific health conditions. Additionally, a history of allergies, including various types of allergic events not limited to anaphylaxis, was frequently observed, representing approximately 20% of the total cases and close to 25% of the fatality cases (
                <xref ref-type="fig" rid="f6">Figure 6</xref>). According to reports, 10.7% of individuals who have received vaccinations have died. From 
                <xref ref-type="fig" rid="f5">Figure 5</xref>, one can clearly observe that the majority of COVID-19 vaccination case fatalities are concentrated among individuals aged 70 to 89, regardless of gender, highlighting this age group as particularly vulnerable to severe outcomes. Additionally, a significantly higher mortality rate is observed among males compared to females between the ages of 60 and 99, indicating that males within this age range are more susceptible to severe adverse effects leading to fatalities from the COVID-19 vaccination. These findings underscore the heightened vulnerability of elderly individuals, particularly those in their 70s and 80s, and emphasize the increased risk of severe outcomes among older males.</p>
            <fig fig-type="figure" id="f4" orientation="portrait" position="float">
                <label>Figure 4. </label>
                <caption>
                    <title>Case Fatality Number by Age Band and sex.</title>
                </caption>
                <graphic id="gr4" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/167975/daec2e63-3066-4001-8088-f41ca694dec9_figure4.gif"/>
            </fig>
            <fig fig-type="figure" id="f5" orientation="portrait" position="float">
                <label>Figure 5. </label>
                <caption>
                    <title>Reported chronic diseases.</title>
                </caption>
                <graphic id="gr5" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/167975/daec2e63-3066-4001-8088-f41ca694dec9_figure5.gif"/>
            </fig>
            <fig fig-type="figure" id="f6" orientation="portrait" position="float">
                <label>Figure 6. </label>
                <caption>
                    <title>Class label counts before and after applying the various data-sampling techniques for the vaccine type dataset.</title>
                </caption>
                <graphic id="gr6" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/167975/daec2e63-3066-4001-8088-f41ca694dec9_figure6.gif"/>
            </fig>
            <p>The most frequently reported local and systemic side effects after each of the three available COVID-19 vaccinations include headache, pain at the injection site, rash, chills, fatigue, fever, body pain, and vertigo. These symptoms collectively accounted for nearly 10% of all observed cases, typically presenting as mild and transient, reflecting the body's natural immune response to vaccination. Additionally, several other adverse reactions were commonly reported, including various types of pain, nausea, rash at the injection site, muscle aches (myalgia), and shortness of breath. The incidence of these reactions ranged from 4.8% to 9.4%, while the occurrence of other adverse reactions was less than 5% (
                <xref ref-type="fig" rid="f7">Figure 7</xref>).</p>
            <fig fig-type="figure" id="f7" orientation="portrait" position="float">
                <label>Figure 7. </label>
                <caption>
                    <title>Top 29 frequently reported symptoms associated with the Moderna, Pfizer, and Janssen vaccines.</title>
                </caption>
                <graphic id="gr7" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/167975/daec2e63-3066-4001-8088-f41ca694dec9_figure7.gif"/>
            </fig>
            <p>Extensive experiments have been conducted to predict three significant events in COVID-19 vaccination according to different scenarios. ML&#x2019;s most relevant model to classify vaccines in each scenario includes RF, DT, XGB, and LGBM. We used 80% training data and 20% test data to evaluate the effectiveness of different ML-based approaches. As was previously mentioned, this dataset was unbalanced; therefore, We employed sampling strategies to address this problem. A number of well-known performance measures were used to assess the results of classification, including accuracy, precision, recall, F1 score, and ROC-AUC.</p>
            <p>Our results are presented in two parts each with two scenarios: 
                <italic toggle="yes">(a)</italic> multiclass classification with sampling, 
                <italic toggle="yes">(b)</italic> binary classification with sampling, and 
                <italic toggle="yes">(c)</italic> a comparison of the best model for each part.</p>
            <sec id="sec11">
                <title>Multiclass classification results: based upon both medical history and vaccine type</title>
                <p>This section presents the results of the multiclass classification for covid-19 vaccine predicting problem, along with the analysis and the discussion. Firstly, we considered the patient&#x2019;s medical history as independent features and the vaccine type (value 0 means Moderna, 1 means Pfizer, and 2 means Janssen) as dependent features that depend on the independent features. Then each of the three data-sampling procedures&#x2014;SMOTE, TOMEK-LINKS, and SMOTETOMEK&#x2014;was applied separately. 
                    <xref ref-type="fig" rid="f6">Figure 6</xref> illustrates the effects of applying various data-balancing techniques.</p>
                <p>The performance parameters for each model on the test dataset are presented in 
                    <xref ref-type="table" rid="T2">Table 2</xref>. As a result, the following observations have been noted:
                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>The testing accuracy values range from approximately 75% to 81% across different models and methods. The Random Forest (RF) models with Normal, TOMEK-LINKS, and SMOTETOMEK methods achieved the highest testing accuracy of around 80.8%, while the XGBoost (XGB) and LightGBM (LGBM) models with Normal, SMOTE, and TOMEK-LINKS methods achieved slightly lower testing accuracy, ranging from 75.2% to 76.2%.</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>The training accuracy values are relatively close to the testing accuracy values, indicating that the models are not overfitting to the training data. The training accuracy values range from approximately 76.9% to 81.2%.</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Macro Precision, Recall, and F1 Scores: These metrics provide insights into the models' performance for each class, and the macro averaging considers all classes equally. The RF and DT models consistently show similar precision, recall, and F1 scores across different methods, ranging from around 78.9% to 81.6%. The XGB and LGBM models tend to have slightly lower scores, ranging from approximately 70.5% to 74.6%. The RF models generally achieve the highest scores, while the XGB and LGBM models have the lowest scores.</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>The AUC (Area Under the Curve) values represent the performance of the models in terms of their ability to rank samples correctly across all classes. The AUC values range from approximately 78% to 85%. The RF models with SMOTE and SMOTETOMEK methods achieved the highest AUC values of around 85%, indicating better overall performance in distinguishing between different vaccine types.</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Overall, the RF models consistently perform well across different methods, with relatively higher accuracy, precision, recall, F1 scores, and AUC values. The XGB and LGBM models have lower performance compared to RF and DT models. The SMOTE and SMOTETOMEK methods generally improve the performance of the models, as seen in higher AUC values compared to the Normal and TOMEK-LINKS methods. These models achieve relatively high testing accuracy, balanced precision, recall, and F1 scores, as well as high AUC values.</p>
                        </list-item>
                    </list>
                </p>
                <table-wrap id="T2" orientation="portrait" position="float">
                    <label>Table 2. </label>
                    <caption>
                        <title>Performance measures of multiclass classification.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Method</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Model</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Testing Accuracy</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Training Accuracy</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Macro Precision</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Macro Recall</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Macro F1 scores</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">AUC</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="4" valign="top">Normal</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">RF</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80823</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81208</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81569</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.78993</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.79653</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">DT</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80823</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81210</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81569</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.78993</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.79653</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">XGB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.76174</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.76480</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.78625</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.72301</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.74137</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">LGBM</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.75218</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.75926</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.77682</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.70460</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.72229</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.78</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="4" valign="top">SMOTE</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">RF</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80317</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80393</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80226</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80057</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.79300</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.85</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">DT</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80374</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80397</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80180</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80102</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.79356</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.85</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">XGB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.75740</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.75748</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.74593</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.75048</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.74586</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">LGBM</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.74953</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.75284</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.73641</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.74317</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.73613</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="4" valign="top">TOMEK-LINKS</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">RF</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80823</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81208</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81569</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.78993</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.79653</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">DT</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80823</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81210</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81569</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.78993</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.79653</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">XGB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.76174</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.76480</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.78625</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.72301</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.74137</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">LGBM</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.75218</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.75926</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.77682</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.70460</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.72229</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.78</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="4" valign="top">SMOTETOMEK</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">RF</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80358</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80451</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80189</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80089</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.79341</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.85</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">DT</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80374</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80451</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80180</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80102</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.79356</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.85</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">XGB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.75901</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.75789</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.74595</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.75350</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.74740</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.82</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">LGBM</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.75708</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.76176</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <styled-content style="#212121" style-type="color">0.74552</styled-content>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <styled-content style="#212121" style-type="color">0.75530</styled-content>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <styled-content style="#212121" style-type="color">0.74652</styled-content>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.82</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>ROC curves have been used to further analyze the predictive capability of these developed models, which are shown in 
                    <xref ref-type="fig" rid="f8">Figure 8</xref>. The RF and DT models prove their effectiveness. Taking AUC into account, all developed models perform satisfactorily.</p>
                <fig fig-type="figure" id="f8" orientation="portrait" position="float">
                    <label>Figure 8. </label>
                    <caption>
                        <title>ROC curves for covid-19 type multiclass classification.</title>
                    </caption>
                    <graphic id="gr8" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/167975/daec2e63-3066-4001-8088-f41ca694dec9_figure8.gif"/>
                </fig>
            </sec>
            <sec id="sec12">
                <title>Binary classification results</title>
                <p>In our model&#x2019;s analysis, firstly, we considered the patient&#x2019;s medical history as the independent features, and the vaccine type (value 0 means Moderna and value 1 means Pfizer) and the patient death (value 0 mean alive, and value 1 mean died) as dependent features. We trained and evaluated our models using test data by measuring accuracy, precision, recall, and AUC.</p>
                <p>Scenario 1: Based upon both medical history and vaccine type</p>
                <p>The performance parameters for each model on the test dataset are presented in 
                    <xref ref-type="table" rid="T3">Table 3</xref>. As a result, the following observations have been noted:
                    <list list-type="order">
                        <list-item>
                            <label>1.</label>
                            <p>RF achieved high testing accuracy (0.87091) and training accuracy (0.87439), indicating good generalization and low overfitting. It demonstrated high precision (0.87974), recall (0.87091), and F1 score (0.87424), suggesting a balanced performance between identifying positive and negative instances. The AUC (0.93) indicates a high discriminatory power of the model. The precision value for both RF and DT was reported as 0.87. XGB and LGBM also show a comparable precision value of 0.86 and 0.0.84, respectively.</p>
                        </list-item>
                        <list-item>
                            <label>2.</label>
                            <p>DT achieved similar testing accuracy (0.86975) and training accuracy (0.87439) as RF. It showed slightly lower precision (0.8779), recall (0.86975), and F1 score (0.8728) compared to RF. The AUC (0.93) suggests a good ability to distinguish between positive and negative instances.</p>
                        </list-item>
                        <list-item>
                            <label>3.</label>
                            <p>XGB achieved a slightly lower testing accuracy (0.85905) and training accuracy (0.86122) compared to RF and DT. It demonstrated comparable precision (0.86031), recall (0.85905), and F1 score (0.8596) to the testing accuracy, indicating a balanced performance. The AUC (0.91) suggests a reasonably good ability to discriminate between positive and negative instances.</p>
                        </list-item>
                        <list-item>
                            <label>4.</label>
                            <p>LGBM showed the lowest testing accuracy (0.84953) and training accuracy (0.85038) among the models. It had slightly lower precision (0.84771), recall (0.84953), and F1 score (0.84857) compared to the other models. The AUC (0.89) suggests a good ability to distinguish between positive and negative instances, although it is lower than RF and DT.</p>
                        </list-item>
                        <list-item>
                            <label>5.</label>
                            <p>The RF and DT models with vaccine-type target consistently achieved the highest accuracy, Recall, Precision, F1 score, and AUC, especially RF outperforms all others. XGB and LGBM models had slightly lower performance metrics but still maintained reasonable accuracy and AUC.</p>
                        </list-item>
                        <list-item>
                            <label>6.</label>
                            <p>Thus, the experimental analysis recommends the RF model is the most suitable for detecting vaccine type compared to the other models.</p>
                        </list-item>
                    </list>
                </p>
                <table-wrap id="T3" orientation="portrait" position="float">
                    <label>Table 3. </label>
                    <caption>
                        <title>Experimental performance of Scenario 1 the models with binary vaccine type dataset.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Method</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Model</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Testing Accuracy</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Training Accuracy</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Precision</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Recall</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">F1 scores</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">AUC</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="4" valign="top">Normal</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">RF</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.87091</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.87439</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.87974</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.87091</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.87424</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">DT</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.86975</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.87439</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.8779</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.86975</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.8728</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.93</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">XGB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.85905</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.86122</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.86031</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.85905</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.8596</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.91</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">LGBM</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84953</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.85038</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84771</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84953</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84857</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.89</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>ROC curves have been used to further analyze the predictive capability of these models, which are shown in 
                    <xref ref-type="fig" rid="f9">Figure 9</xref>. The RF and DT models prove their effectiveness. Taking AUC into account, all developed models perform satisfactorily.</p>
                <fig fig-type="figure" id="f9" orientation="portrait" position="float">
                    <label>Figure 9. </label>
                    <caption>
                        <title>Scenario 1: ROC Curve for binary vaccine type dataset.</title>
                    </caption>
                    <graphic id="gr9" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/167975/daec2e63-3066-4001-8088-f41ca694dec9_figure9.gif"/>
                </fig>
                <p>Scenario 2: based upon both medical history and death</p>
                <p>The patient&#x2019;s death dataset was also experimented with as the vaccine-type dataset. 
                    <xref ref-type="fig" rid="f10">Figure 10</xref> demonstrates the effect of applying various data-sampling methods. The performance parameters for each model on the test dataset are presented in 
                    <xref ref-type="table" rid="T4">Table 4</xref>. As a result, the following observations have been noted:
                    <list list-type="order">
                        <list-item>
                            <label>1.</label>
                            <p>The testing accuracy values range from approximately 79.9% to 85.7%, depending on the model and method used. The RF and XGB models consistently achieve higher testing accuracy compared to DT and LGBM models. Among the methods, TOMEK-LINKS and SMOTETOMEK methods tend to show slightly lower testing accuracy compared to Normal and SMOTE methods.</p>
                        </list-item>
                        <list-item>
                            <label>2.</label>
                            <p>The training accuracy values are relatively high, ranging from approximately 87% to 95.2%. However, there is a notable difference between the training accuracy and testing accuracy values, suggesting potential overfitting issues, especially for the RF models.</p>
                        </list-item>
                        <list-item>
                            <label>3.</label>
                            <p>Precision, Recall, and F1 scores: The precision, recall, and F1 scores provide insights into the models' performance for predicting the positive class (death possibility). The RF models consistently achieve higher precision, recall, and F1 scores compared to DT, XGB, and LGBM models. Among the methods, TOMEK-LINKS and SMOTETOMEK methods tend to show slightly lower precision, recall, and F1 scores compared to Normal and SMOTE methods.</p>
                        </list-item>
                        <list-item>
                            <label>4.</label>
                            <p>The AUC (Area Under the Curve) values represent the models' ability to rank samples correctly and discriminate between positive and negative classes. The AUC values range from approximately 66% to 86%. The RF and XGB models consistently achieve higher AUC values, indicating better overall performance in distinguishing between COVID-19 death possibilities.</p>
                        </list-item>
                        <list-item>
                            <label>5.</label>
                            <p>the models trained on the normal data generally performed better in terms of accuracy and AUC compared to the models trained on the modified datasets (SMOTE, TOMEK-LINKS, SMOTETOMEK). The Random Forest, XGBoost, and LGBM models consistently showed good performance across the metrics in all datasets, indicating their robustness and effectiveness in classification tasks. The Decision Tree model had relatively lower performance, especially in terms of AUC, in all methods.</p>
                        </list-item>
                    </list>
                </p>
                <fig fig-type="figure" id="f10" orientation="portrait" position="float">
                    <label>Figure 10. </label>
                    <caption>
                        <title>Class label counts before and after applying the various data-sampling techniques for the death dataset.</title>
                    </caption>
                    <graphic id="gr10" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/167975/daec2e63-3066-4001-8088-f41ca694dec9_figure10.gif"/>
                </fig>
                <table-wrap id="T4" orientation="portrait" position="float">
                    <label>Table 4. </label>
                    <caption>
                        <title>Performance measures of different methods with death dataset.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Method</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Model</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Testing Accuracy</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Training Accuracy</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Precision</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Recall</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">F1 scores</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">AUC</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="4" valign="top">Normal</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">RF</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84261</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.95272</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.82632</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84261</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.83277</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.83</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">DT</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.82917</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.95272</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81553</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.82917</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.82151</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.66</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">XGB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.85700</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.93064</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.83523</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.85700</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84072</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">LGBM</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.85700</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.91312</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.83938</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.85700</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84503</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="4" valign="top">SMOTE</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">RF</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81861</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.92647</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84585</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81861</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.82961</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.83</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">DT</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81285</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.92647</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.83576</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81285</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.82256</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.70</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">XGB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.79942</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.88992</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84183</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.79942</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81556</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">LGBM</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.806147</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.87468</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.85443</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80614</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.82339</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.82</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="4" valign="top">TOMEK-LINKS</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">RF</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84261</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.95240</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.82632</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84261</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.83277</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.83</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">DT</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.83301</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.95240</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.82065</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.83301</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.82608</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.67</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">XGB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84932</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.92920</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.83149</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84932</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.83791</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.85</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">LGBM</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.85508</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.92002</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.83641</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.85508</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84236</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.86</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="4" valign="top">SMOTETOMEK</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">RF</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80326</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.9198</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84520</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.80326</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81909</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.82</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">DT</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.78406</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.91983</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.82101</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.78406</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.79924</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.65</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">XGB</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.79846</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.88383</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84462</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.79846</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81563</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">LGBM</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.79846</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.86958</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.84880</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.79846</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.81665</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">0.82</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>ROC curves have been used to further analyze the predictive capability of these models, which are shown in 
                    <xref ref-type="fig" rid="f11">Figure 11</xref>. The RF and DT models prove their effectiveness. Taking AUC into account, all developed models perform satisfactorily.</p>
                <fig fig-type="figure" id="f11" orientation="portrait" position="float">
                    <label>Figure 11. </label>
                    <caption>
                        <title>Scenario 2: ROC curve for a Death.</title>
                    </caption>
                    <graphic id="gr11" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/167975/daec2e63-3066-4001-8088-f41ca694dec9_figure11.gif"/>
                </fig>
                <p>The importance of all the features in the COVID-19 vaccine adverse reactions dataset is calculated using the feature importance package from the Scikit-learn Python library. A visual representation of the calculated values for feature importance is displayed in 
                    <xref ref-type="fig" rid="f12">Figure 12</xref>. The features are arranged based on their respective importance scores.</p>
                <fig fig-type="figure" id="f12" orientation="portrait" position="float">
                    <label>Figure 12. </label>
                    <caption>
                        <title>Ranking of features based on the patients' medical history coefficient values.</title>
                    </caption>
                    <graphic id="gr12" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/167975/daec2e63-3066-4001-8088-f41ca694dec9_figure12.gif"/>
                </fig>
                <p>
                    <xref ref-type="fig" rid="f12">Figure 12</xref> shows that patients' age, gender, and use of other medicines were significant factors in the past medical history of all target variables. WHEN examining the target variable of &#x201c;vaccine type,&#x201d; the analysis revealed a comprehensive set of critical attributes within the patient's medical history that strongly influence the selection of the administered vaccine. These attributes include previous vaccine history, allergic history, diabetes, arthritis, hypertension, and asthma. Furthermore, when investigating the target variable of death status, certain factors emerged as highly significant. These factors include heart disease, allergic history, dementia, hypertension, diabetes, kidney disease, and Chronic obstructive pulmonary disease (COPD). These attributes have shown a noteworthy impact on the desired outcome, indicating their importance in predicting the death status of patients.</p>
                <p>The patient's age and gender provide essential demographic information that may impact the choice of vaccine, as certain vaccines have age or gender-specific recommendations. Additionally, considering the patient's current medication usage is crucial to ensure compatibility and potential interactions with the chosen vaccine. Previous vaccine history helps determine if the patient requires a booster or a specific type of vaccine.</p>
                <p>The presence of underlying conditions such as diabetes, arthritis, allergic history, hypertension, and asthma is highly influential in the decision-making process. These conditions may affect the patient's immune response or make them more susceptible to certain vaccine side effects. By considering these attributes, healthcare professionals can tailor the vaccine type to maximize efficacy and minimize risks for each patient.</p>
            </sec>
            <sec id="sec13">
                <title>Comparing Proposal Models with Related Works</title>
                <p>This section illustrates the comparison of our proposal model with the results of prediction models that are available in the related works. The comparison was structured around the methodologies employed and the achieved levels of accuracy. 
                    <xref ref-type="table" rid="T5">Table 5</xref> presents the findings from four distinct studies on COVID-19 vaccine side effects. These studies utilized data from Twitter and the VAERS (COVID-19 World Vaccine Adverse Reactions dataset) spanning across different years. Each study employs a unique set of techniques to achieve specific objectives, resulting in varying degrees of accuracy.</p>
                <table-wrap id="T5" orientation="portrait" position="float">
                    <label>Table 5. </label>
                    <caption>
                        <title>Comparing results of studies.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Authors</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Dataset</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Objective</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Model</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Accuracy (%)</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Precision (%)</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Recall (%)</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">F1-scores (%)</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="4" valign="middle">M.Ma&#x2019;mon et al. [May 26, 2021]</td>
                                <td align="left" colspan="1" rowspan="4" valign="middle">Online survey</td>
                                <td align="left" colspan="1" rowspan="4" valign="middle">Predict the severity of side effects</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">XGB</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.79</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">-</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">-</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">-</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">RF</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.80</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">-</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">-</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">-</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">K*</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.44</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">-</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">-</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">-</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">MLP</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.70</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">-</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">-</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">-</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="5" valign="middle">Lian et al.
                                    <break/>[Jan 11, 2022]</td>
                                <td align="left" colspan="1" rowspan="5" valign="middle">Twitter</td>
                                <td align="left" colspan="1" rowspan="5" valign="middle">Identify personal experiences</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">SVM</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.89</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.89</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.94</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.91</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">LR</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.90</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.92</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.92</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.92</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">RF</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.90</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.90</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.94</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.92</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">Extra Trees</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.88</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.90</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.90</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.90</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">GB</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.89</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.92</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.90</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.91</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="4" valign="middle">Sujatha et al.
                                    <break/>[Oct 26, 2021]</td>
                                <td align="left" colspan="1" rowspan="4" valign="middle">VAERS</td>
                                <td align="left" colspan="1" rowspan="4" valign="middle">Predict suitability for vaccination</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">LR</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.97</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.88</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.90</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.89</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">RF</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.97</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.89</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.95</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.92</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">AdaBoost</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.98</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.89</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.97</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.93</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="middle">DT</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.97</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.91</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.87</td>
                                <td align="left" colspan="1" rowspan="1" valign="middle">0.89</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
            </sec>
            <sec id="sec14">
                <title>Strengths and limitations</title>
                <p>As far as the authors are aware, this is the first study that attempts to predict the type of covid-19 vaccine appropriate for a candidate, along with the death probability risk. Additionally, we suggest approaches to address the issue of imbalanced data concerning adverse reactions to COVID-19 vaccines.</p>
                <p>This study has some limitations. Because these data were collected online, we cannot rule out information-gathering bias in the study. Moreover, this data set contained a significant amount of missing data, which may lead to a misrepresentation of patient populations.</p>
            </sec>
        </sec>
        <sec id="sec15">
            <title>Conclusion and future works</title>
            <sec id="sec16">
                <title>Conclusion</title>
                <p>In this work, four ML models were evaluated: DT, RF, XGBoost, and LGBM. Three sampling techniques were executed for each model to handle imbalanced data. Below are some of the key findings of the study, which shed light on crucial insights and implications:
                    <list list-type="order">
                        <list-item>
                            <label>1.</label>
                            <p>The tree-based model RF presented the best overall results with multiclass classification.</p>
                        </list-item>
                        <list-item>
                            <label>2.</label>
                            <p>The SMOTE and SMOTETOMEK methods generally improve the performance of the models, as seen in higher AUC values compared to the Normal and TOMEK-LINKS methods.</p>
                        </list-item>
                        <list-item>
                            <label>3.</label>
                            <p>For binary classification in scenario 1, the experimental analysis recommends the RF model as the most suitable for detecting vaccine type compared to the other models.</p>
                        </list-item>
                        <list-item>
                            <label>4.</label>
                            <p>In scenario 2, the RF, XGBoost, and LGBM models consistently showed good performance across the metrics in all methods, indicating their robustness and effectiveness in classification tasks.</p>
                        </list-item>
                        <list-item>
                            <label>5.</label>
                            <p>The Decision Tree model had relatively lower performance, especially in terms of AUC, in all methods.</p>
                        </list-item>
                        <list-item>
                            <label>6.</label>
                            <p>The result revealed that patient age, gender, allergic history, prior vaccine, other medicines, diabetes, hypertension, and heart disease are significant pre-existing factors that strongly influence the selection of the administered vaccine.</p>
                        </list-item>
                    </list>
                </p>
                <p>According to the study's results, the RF model is recommended for machine learning tasks that demand high accuracy and robustness. While both the XGBoost and LGBM models are also viable options, the RF model could be preferable when dealing with imbalanced data. The effectiveness of these balancing algorithms has been evaluated, leading to the conclusion that no single technique can consistently produce the best results across all datasets. When considering the importance of data distribution, machine learning techniques and balancing algorithms are both crucial.</p>
            </sec>
            <sec id="sec17">
                <title>Future Works</title>
                <p>The findings of this study can be extrapolated to various other datasets related to vaccinations. While the inclusion of medical history features was restricted due to the substantial size of the dataset and the computational complexities associated with processing each disease, there is room for further advancement. By automating the system, its capability to analyze predictions based on a broader spectrum of medical history features can be enhanced. As new data streams into the dataset, fresh predictions can be dynamically generated by this automation, considering the prevailing factors at that specific moment. Additionally, the integration of deep learning methodologies presents an opportunity to uncover latent patterns within the data, thereby enhancing comprehension of the intricate dynamics governing COVID-19 vaccine acceptability. This multifaceted approach is poised not only to augment predictive accuracy but also to deepen the understanding of the nuanced interplay between medical history, vaccination patterns, and evolving epidemiological dynamics.</p>
            </sec>
        </sec>
    </body>
    <back>
        <sec id="sec18" sec-type="data-availability">
            <title>Data availability</title>
            <p>The dataset used to support the findings of this study is available at the following: 
                <ext-link ext-link-type="uri" xlink:href="https://vaers.hhs.gov/data/datasets.html">https://vaers.hhs.gov/data/datasets.html</ext-link>.</p>
            <p>The dataset is comprised of three CSV files, namely VAERSDATA, VAERSVAX, and VAERSSYMPTOMS. Within these datasets, VAERSDATA provides comprehensive information regarding individuals, VAERSVAX offers details related to vaccines, encompassing vaccination type, manufacturer, dosage count, and vaccination location, and VAERSSYMPTOMS catalog symptoms reported as various illnesses following vaccinations.</p>
            <p>[VAERS Data]: 
                <ext-link ext-link-type="uri" xlink:href="https://vaers.hhs.gov/eSubDownload/index.jsp?fn=2021VAERSDATA.csv">https://vaers.hhs.gov/eSubDownload/index.jsp?fn=2021VAERSDATA.csv</ext-link>.</p>
            <p>[VAERS Vaccine]: 
                <ext-link ext-link-type="uri" xlink:href="https://vaers.hhs.gov/eSubDownload/index.jsp?fn=2021VAERSVAX.csv">https://vaers.hhs.gov/eSubDownload/index.jsp?fn=2021VAERSVAX.csv</ext-link>.</p>
            <p>[VAERS Symptoms]: 
                <ext-link ext-link-type="uri" xlink:href="https://vaers.hhs.gov/eSubDownload/index.jsp?fn=2021VAERSSYMPTOMS.csv">https://vaers.hhs.gov/eSubDownload/index.jsp?fn=2021VAERSSYMPTOMS.csv</ext-link>.
</p>
        </sec>
        <ref-list>
            <title>References</title>
            <ref id="ref1">
                <label>1</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Vel&#x00e1;squez</surname>
                            <given-names>G</given-names>
                        </name>
</person-group>:
                    <source>

                        <italic toggle="yes">Vaccines, Medicines and COVID-19: How Can WHO Be Given a Stronger Voice?</italic>
</source>
                    <publisher-name>Springer Nature</publisher-name>;<year>2022</year>;<fpage>117</fpage>.</mixed-citation>
            </ref>
            <ref id="ref2">
                <label>2</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Dai</surname>
                            <given-names>X</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Xiong</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Li</surname>
                            <given-names>N</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <chapter-title>Vaccine types.</chapter-title>
                    <source>

                        <italic toggle="yes">Vaccines-the History and Future.</italic>
</source>
                    <publisher-name>IntechOpen</publisher-name>;<year>2019</year>; (pp.<fpage>1</fpage>&#x2013;<lpage>18</lpage>).</mixed-citation>
            </ref>
            <ref id="ref3">
                <label>3</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Eroglu</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Nuwarda</surname>
                            <given-names>RF</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ramzan</surname>
                            <given-names>I</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A Narrative Review of COVID-19 Vaccines.</article-title>
                    <source>

                        <italic toggle="yes">Vaccines.</italic>
</source>
                    <year>2021</year>;<volume>10</volume>(<issue>1</issue>):<fpage>62</fpage>.
                    <pub-id pub-id-type="pmid">35062723</pub-id>
                    <pub-id pub-id-type="doi">10.3390/vaccines10010062</pub-id>
                    <pub-id pub-id-type="pmcid">PMC8779282</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref4">
                <label>4</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Monadhel</surname>
                            <given-names>H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Abbas</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mohammed</surname>
                            <given-names>A</given-names>
                        </name>
</person-group>:
                    <article-title>COVID-19 vaccinations and their side effects: a scoping systematic review [version 1; peer review: awaiting peer review].</article-title>
                    <source>

                        <italic toggle="yes">F1000Res.</italic>
</source>
                    <year>2023</year>;<volume>12</volume>:<fpage>604</fpage>.
                    <pub-id pub-id-type="doi">10.12688/f1000research.134171.1</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref5">
                <label>5</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Vitiello</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ferrara</surname>
                            <given-names>F</given-names>
                        </name>
</person-group>:
                    <article-title>Brief review of the mRNA vaccines COVID-19.</article-title>
                    <source>

                        <italic toggle="yes">Inflammopharmacology.</italic>
</source>
                    <year>2021</year>;<volume>29</volume>(<issue>3</issue>):<fpage>645</fpage>&#x2013;<lpage>649</lpage>.
                    <pub-id pub-id-type="pmid">33932192</pub-id>
                    <pub-id pub-id-type="doi">10.1007/s10787-021-00863-6</pub-id>
                    <pub-id pub-id-type="pmcid">PMC8087898</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref6">
                <label>6</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Patel</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kaki</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Potluri</surname>
                            <given-names>VS</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A comprehensive review of SARS-CoV-2 vaccines: Pfizer, Moderna &amp; Johnson &amp; Johnson.</article-title>
                    <source>

                        <italic toggle="yes">Hum. Vaccin. Immunother.</italic>
</source>
                    <year>2022</year>;<volume>18</volume>(<issue>1</issue>):<fpage>2002083</fpage>.
                    <pub-id pub-id-type="pmid">35130825</pub-id>
                    <pub-id pub-id-type="doi">10.1080/21645515.2021.2002083</pub-id>
                    <pub-id pub-id-type="pmcid">PMC8862159</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref7">
                <label>7</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Al Khames Aga</surname>
                            <given-names>QA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Alkhaffaf</surname>
                            <given-names>WH</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hatem</surname>
                            <given-names>TH</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Safety of COVID-19 vaccines.</article-title>
                    <source>

                        <italic toggle="yes">J. Med. Virol.</italic>
</source>
                    <year>2021</year>;<volume>93</volume>(<issue>12</issue>):<fpage>6588</fpage>&#x2013;<lpage>6594</lpage>.
                    <pub-id pub-id-type="pmid">34270094</pub-id>
                    <pub-id pub-id-type="doi">10.1002/jmv.27304</pub-id>
                    <pub-id pub-id-type="pmcid">PMC8426829</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref8">
                <label>8</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Sujatha</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Venkata Siva Krishna</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Chatterjee</surname>
                            <given-names>JM</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Prediction of suitable candidates for COVID-19 vaccination.</article-title>
                    <source>

                        <italic toggle="yes">Intell. Autom. Soft Comput.</italic>
</source>
                    <year>2022</year>;<volume>32</volume>(<issue>1</issue>):<fpage>525</fpage>&#x2013;<lpage>541</lpage>.
                    <pub-id pub-id-type="doi">10.3233/JIFS-202714</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref9">
                <label>9</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Javaid</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Haleem</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Singh</surname>
                            <given-names>RP</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Significance of machine learning in healthcare: Features, pillars and applications.</article-title>
                    <source>

                        <italic toggle="yes">Int. J. Intell. Networks.</italic>
</source>
                    <year>2022</year>;<volume>3</volume>:<fpage>58</fpage>&#x2013;<lpage>73</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.ijin.2022.05.002</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref10">
                <label>10</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Zoumana</surname>
                            <given-names>KEITA</given-names>
                        </name>
</person-group>:
                    <article-title>&#x201c;Classification in Machine Learning: An Introduction&#x201d;,datacamp.</article-title>
                    <year>Sep 2022</year>.
                    <ext-link ext-link-type="uri" xlink:href="https://www.datacamp.com/blog/classification-machine-learning">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref11">
                <label>11</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Hatmal</surname>
                            <given-names>MMM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Al-Hatamleh</surname>
                            <given-names>MA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Olaimat</surname>
                            <given-names>AN</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Side effects and perceptions following COVID-19 vaccination in Jordan: a randomized, cross-sectional study implementing machine learning for predicting severity of side effects.</article-title>
                    <source>

                        <italic toggle="yes">Vaccines.</italic>
</source>
                    <year>2021</year>;<volume>9</volume>(<issue>6</issue>):<fpage>556</fpage>.
                    <pub-id pub-id-type="pmid">34073382</pub-id>
                    <pub-id pub-id-type="doi">10.3390/vaccines9060556</pub-id>
                    <pub-id pub-id-type="pmcid">PMC8229440</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref12">
                <label>12</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Lian</surname>
                            <given-names>AT</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Du</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Tang</surname>
                            <given-names>L</given-names>
                        </name>
</person-group>:
                    <article-title>Using a machine learning approach to monitor COVID-19 vaccine adverse events (VAE) from twitter data.</article-title>
                    <source>

                        <italic toggle="yes">Vaccines.</italic>
</source>
                    <year>2022</year>;<volume>10</volume>(<issue>1</issue>):<fpage>103</fpage>.
                    <pub-id pub-id-type="pmid">35062764</pub-id>
                    <pub-id pub-id-type="doi">10.3390/vaccines10010103</pub-id>
                    <pub-id pub-id-type="pmcid">PMC8781534</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref13">
                <label>13</label>
                <mixed-citation publication-type="other">
                    <collab>VAERS Data Sets</collab>:
                    <ext-link ext-link-type="uri" xlink:href="https://vaers.hhs.gov/data/datasets.html">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref14">
                <label>14</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Henry</surname>
                            <given-names>M</given-names>
                        </name>
</person-group>:
                    <article-title>Imbalanced Classification in Python: SMOTE-Tomek Links Method.</article-title>
                    <ext-link ext-link-type="uri" xlink:href="https://towardsdatascience.com/imbalanced-classification-in-python-smote-tomek-links-method-6e48dfe69bbc">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref15">
                <label>15</label>
                <mixed-citation publication-type="other">
                    <collab>Random Forest Algorithm</collab>:
                    <article-title>Java T point.</article-title>
                    <year>2018</year>.
                    <ext-link ext-link-type="uri" xlink:href="https://www.javatpoint.com/machine-learning-random-forest-algorithm">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref16">
                <label>16</label>
                <mixed-citation publication-type="other">
                    <article-title>Decision Tree Classification Algorithm GeeksforGeeks.</article-title>
                    <year>08 May, 2023</year>.
                    <ext-link ext-link-type="uri" xlink:href="https://www.geeksforgeeks.org/decision-tree/?ref=gcse">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref17">
                <label>17</label>
                <mixed-citation publication-type="other">&#x201c;XGBoost ML Model in Python&#x201d; JavaTpoint, [online].
                    <ext-link ext-link-type="uri" xlink:href="https://www.javatpoint.com/xgboost-ml-model-in-python">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref18">
                <label>18</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Banerjee</surname>
                            <given-names>P</given-names>
                        </name>
</person-group>:
                    <article-title>LightGBM Classifier in Python Kaggle.</article-title>
                    <year>2021</year>.
                    <ext-link ext-link-type="uri" xlink:href="https://www.kaggle.com/code/prashant111/lightgbm-classifier-in-python">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref19">
                <label>19</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Grandini</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bagli</surname>
                            <given-names>E</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Visani</surname>
                            <given-names>G</given-names>
                        </name>
</person-group>:
                    <article-title>Metrics for multi-class classification: an overview.</article-title>
                    <source>

                        <italic toggle="yes">arXiv preprint arXiv:2008.05756.</italic>
</source>
                    <year>2020</year>.</mixed-citation>
            </ref>
            <ref id="ref20">
                <label>20</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Markoulidakis</surname>
                            <given-names>I</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kopsiaftis</surname>
                            <given-names>G</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Rallis</surname>
                            <given-names>I</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Multi-Class Confusion Matrix Reduction method and its application on Net Promoter Score classification problem.</article-title>
                    <source>

                        <italic toggle="yes">In The 14th pervasive technologies related to assistive environments conference.</italic>
</source>
                    <year>2021, June</year>:<fpage>412</fpage>&#x2013;<lpage>419</lpage>.</mixed-citation>
            </ref>
            <ref id="ref21">
                <label>21</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Abbas</surname>
                            <given-names>AR</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Farooq</surname>
                            <given-names>AO</given-names>
                        </name>
</person-group>:
                    <article-title>Skin Detection Using Improved ID3 Algorithm.</article-title>
                    <source>

                        <italic toggle="yes">Iraqi J. Sci.</italic>
</source>
                    <year>2019</year>;<fpage>402</fpage>&#x2013;<lpage>410</lpage>.</mixed-citation>
            </ref>
            <ref id="ref22">
                <label>22</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Narkhede</surname>
                            <given-names>S</given-names>
                        </name>
</person-group>:
                    <article-title>Understanding AUC - ROC Curve Medium.</article-title>
                    <year>Jun 26, 2018</year>.
                    <ext-link ext-link-type="uri" xlink:href="https://towardsdatascience.com/understanding-auc-roc-curve-68b2303cc9c5">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref23">
                <label>23</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Abbas</surname>
                            <given-names>AR</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kareem</surname>
                            <given-names>AR</given-names>
                        </name>
</person-group>:
                    <article-title>Age estimation using support vector machine.</article-title>
                    <source>

                        <italic toggle="yes">Iraqi J. Sci.</italic>
</source>
                    <year>2018</year>;<fpage>1746</fpage>&#x2013;<lpage>1756</lpage>.</mixed-citation>
            </ref>
            <ref id="ref24">
                <label>24</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Belete</surname>
                            <given-names>DM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Huchaiah</surname>
                            <given-names>MD</given-names>
                        </name>
</person-group>:
                    <article-title>Grid search in hyperparameter optimization of machine learning models for prediction of HIV/AIDS test results.</article-title>
                    <source>

                        <italic toggle="yes">Int. J. Comput. Appl.</italic>
</source>
                    <year>2022</year>;<volume>44</volume>(<issue>9</issue>):<fpage>875</fpage>&#x2013;<lpage>886</lpage>.</mixed-citation>
            </ref>
        </ref-list>
    </back>
    <sub-article article-type="reviewer-report" id="report316661">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.167975.r316661</article-id>
            <title-group>
                <article-title>Reviewer response for version 2</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Bandyopadhyay</surname>
                        <given-names>Samir Kumar</given-names>
                    </name>
                    <xref ref-type="aff" rid="r316661a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-4868-3459</uri>
                </contrib>
                <aff id="r316661a1">
                    <label>1</label>The Bhawanipur Education Society College, Kolkata, India</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>5</day>
                <month>9</month>
                <year>2024</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2024 Bandyopadhyay SK</copyright-statement>
                <copyright-year>2024</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport316661" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.140395.2"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>After reviewing the revision made by the authors I am satisfied with the revision.</p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>No</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>Yes</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Yes</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>Yes</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>No</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>No</p>
            <p>Reviewer Expertise:</p>
            <p>Image Processing, Forensic Science, Machine Learning, Crime Science, Data Science, etc.</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard.</p>
        </body>
    </sub-article>
    <sub-article article-type="reviewer-report" id="report233060">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.153740.r233060</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Bandyopadhyay</surname>
                        <given-names>Samir Kumar</given-names>
                    </name>
                    <xref ref-type="aff" rid="r233060a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-4868-3459</uri>
                </contrib>
                <aff id="r233060a1">
                    <label>1</label>The Bhawanipur Education Society College, Kolkata, India</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>31</day>
                <month>5</month>
                <year>2024</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2024 Bandyopadhyay SK</copyright-statement>
                <copyright-year>2024</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport233060" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.140395.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>reject</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>Authors:</p>
            <p> Q1. Specify the imbalanced dataset with an example.</p>
            <p> Q2.What type of Illnesses can be seen at the time of vaccination?</p>
            <p> Q3. Explain EDA since otherwise it is difficult to understand the pre-processing steps.</p>
            <p> Q4 Pre-existing chronic disease is required to study and it must contain fever nature.</p>
            <p> Q5.The process of feature extraction is not provided accurately.</p>
            <p> Q6. What is the difference between attribute values and attribute text?</p>
            <p> Q7. There are no comparisons with existing methods.</p>
            <p> Editor:</p>
            <p> Authors studied to predict which vaccine will be most effective for a candidate without causing severe adverse reactions and to categorize a patient as potentially at high risk of death from the COVID-19 vaccine. In the Feature extraction step, most of the important features in the acquired dataset are presented as textual data. However, in order to analyze them, they must be separated into separate entities. All important parts are not explained properly.</p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>No</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>Yes</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Yes</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>Yes</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>No</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>No</p>
            <p>Reviewer Expertise:</p>
            <p>Image Processing, Forensic Science, Machine Learning, Crime Science, Data Science, etc.</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to state that I do not consider it to be of an acceptable scientific standard, for reasons outlined above.</p>
        </body>
        <sub-article article-type="response" id="comment11783-233060">
            <front-stub>
                <contrib-group>
                    <contrib contrib-type="author">
                        <name>
                            <surname>Monadhel</surname>
                            <given-names>Hind</given-names>
                        </name>
                        <aff>Computer Science, University of Technology Baghdad, Baghdad, Baghdad, Iraq</aff>
                    </contrib>
                </contrib-group>
                <author-notes>
                    <fn fn-type="conflict">
                        <p>
                            <bold>Competing interests: </bold>Non</p>
                    </fn>
                </author-notes>
                <pub-date pub-type="epub">
                    <day>14</day>
                    <month>6</month>
                    <year>2024</year>
                </pub-date>
            </front-stub>
            <body>
                <p>Dear Samir,</p>
                <p> </p>
                <p> Thank you for dedicating your time and effort to review our manuscript. Your insightful comments and suggestions have provided valuable guidance for improving our work.</p>
                <p> </p>
                <p> While we are disappointed by the decision, we genuinely appreciate the constructive feedback that will undoubtedly strengthen our future research efforts.</p>
                <p> </p>
                <p> Thank you once again for your thorough review.</p>
            </body>
        </sub-article>
    </sub-article>
    <sub-article article-type="reviewer-report" id="report245449">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.153740.r245449</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Wu</surname>
                        <given-names>Jinran (Ryan)</given-names>
                    </name>
                    <xref ref-type="aff" rid="r245449a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-2388-3614</uri>
                </contrib>
                <aff id="r245449a1">
                    <label>1</label>Australian Catholic University, Fitzroy, Victoria, Australia</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>3</day>
                <month>5</month>
                <year>2024</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2024 Wu J(</copyright-statement>
                <copyright-year>2024</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport245449" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.140395.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve-with-reservations</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>The authors proposed predicting vaccine types and assessing mortality risk through some ensemble learning approaches. The research topic is interesting, and some comments are given for further consideration.</p>
            <p> 1. Based on Google Scholar, authors missed citing many recent references that shouldn't be ignored. Here, I suggest using a table to list and compare their main points to highlight your contributions to the area.</p>
            <p> 2. For the experiment part, all parameter settings are missing, so that the results declared cannot be repeated. Please release your codes or detailed settings in the appendix.</p>
            <p> 3. For your results, I suggest authors use a cross-validation approach to evaluate the uncertainty of the predictions.&#x00a0;</p>
            <p> 4. Also, considering the imbalance issue, the authors shall consider different penalties for different prediction errors. In other words, we cannot regard "dies" as the same as "non-dies". Authors shall distinguish different losses.</p>
            <p> 5. The discussion could have been improved. The authors shall further explore the underlying implications in this part. Otherwise, the work looks like a mathematical game. In particular, the authors shall connect results to some findings from some top medicine journals.</p>
            <p> 6. The authors shall use a professional writing service to make the content clear.</p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>Partly</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>Partly</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Partly</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>Partly</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>Partly</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Partly</p>
            <p>Reviewer Expertise:</p>
            <p>Machine learning, Forecasting, Applied Statistics</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above.</p>
        </body>
        <sub-article article-type="response" id="comment11782-245449">
            <front-stub>
                <contrib-group>
                    <contrib contrib-type="author">
                        <name>
                            <surname>Monadhel</surname>
                            <given-names>Hind</given-names>
                        </name>
                        <aff>Computer Science, University of Technology Baghdad, Baghdad, Baghdad, Iraq</aff>
                    </contrib>
                </contrib-group>
                <author-notes>
                    <fn fn-type="conflict">
                        <p>
                            <bold>Competing interests: </bold>Non</p>
                    </fn>
                </author-notes>
                <pub-date pub-type="epub">
                    <day>14</day>
                    <month>6</month>
                    <year>2024</year>
                </pub-date>
            </front-stub>
            <body>
                <p>Dear Jinran Wu,</p>
                <p> </p>
                <p> Thank you for your valuable feedback and suggestions. We have carefully addressed all your comments and revised the manuscript accordingly. Specifically, regarding your suggestion " 3. For your results, I suggest authors use a cross-validation approach to evaluate the uncertainty of the predictions," we will upload the experimental code to ensure clarity.</p>
                <p> </p>
                <p> We appreciate your thorough review and believe these revisions have significantly enhanced the quality of our work.</p>
            </body>
        </sub-article>
    </sub-article>
    <sub-article article-type="reviewer-report" id="report257042">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.153740.r257042</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Ghosh</surname>
                        <given-names>Aritra</given-names>
                    </name>
                    <xref ref-type="aff" rid="r257042a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-9204-0434</uri>
                </contrib>
                <aff id="r257042a1">
                    <label>1</label>Computer Science, Sister Nivedita University, Kolkata, West Bengal, India</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>28</day>
                <month>3</month>
                <year>2024</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2024 Ghosh A</copyright-statement>
                <copyright-year>2024</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport257042" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.140395.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>
                <bold>Full Report:</bold>
            </p>
            <p> 
                <bold>Introduction:</bold> The introduction provides a comprehensive overview of the transition in vaccine development timelines and the urgent need for effective COVID-19 vaccines. It highlights the significance of understanding vaccine mechanisms and adverse reactions, setting the stage for the proposed machine-learning framework. However, it could benefit from a succinct statement of the study's objectives to guide readers through the subsequent sections more effectively.</p>
            <p> 
                <bold>Literature Review:</bold> The literature review effectively contextualizes the study within existing research on machine learning applications in healthcare, specifically focusing on COVID-19 vaccine prediction. It provides insights into relevant studies while emphasizing the novelty and contributions of the current work. However, the review could be strengthened by discussing potential limitations or gaps in previous research, thereby justifying the need for the proposed study more explicitly.</p>
            <p> 
                <bold>Methods:</bold> The methods section is detailed and well-structured, outlining the data preprocessing, feature extraction, and modeling techniques employed. It effectively communicates the rationale behind each step and provides clarity on the experimental design. The inclusion of figures and tables enhances the understanding of complex methodologies. However, providing more information on the rationale behind the selection of specific sampling techniques and model evaluation metrics would strengthen the methodology further.</p>
            <p> 
                <bold>Results and Discussion:</bold> The results and discussion section presents comprehensive findings from the study, including performance metrics and an analysis of key features. The results are effectively communicated through tables, figures, and textual descriptions, facilitating interpretation. The discussion contextualizes the findings within the broader literature and highlights implications for vaccine selection and adverse reaction prediction. However, a more structured approach to discussing limitations and future research directions would enhance the clarity of the discussion.</p>
            <p> 
                <bold>Conclusion and Future Works:</bold> The conclusion summarizes the key findings and implications of the study while outlining potential avenues for future research. It effectively emphasizes the significance of the study's contributions and underscores the importance of continued research in this area. However, providing more specific recommendations for addressing identified limitations and mitigating potential biases would enhance the conclusion's comprehensiveness.</p>
            <p> 
                <bold>Overall Assessment:</bold> The abstract and full report provides a detailed and insightful analysis of the proposed machine-learning framework for COVID-19 vaccine prediction. The study demonstrates a rigorous approach to data analysis and model evaluation, yielding valuable insights into vaccine efficacy and adverse reactions. Addressing the following minor issues would further enhance the scientific soundness and readability of the report:</p>
            <p> &#x00a0; 
                <list list-type="order">
                    <list-item>
                        <p>
                            <bold>Clarity and Readability:</bold> 
                            <list list-type="bullet">
                                <list-item>
                                    <p>Ensure consistent terminology throughout the report. For example, use either "COVID-19 vaccine" or "SARS-CoV-2 vaccine" consistently instead of switching between them.</p>
                                </list-item>
                                <list-item>
                                    <p>Consider breaking down lengthy paragraphs into shorter ones for easier readability and comprehension, especially in sections like "Results and Discussion" and "Conclusion and Future Works."</p>
                                </list-item>
                                <list-item>
                                    <p>Provide clear transitions between sections to guide the reader through the report more effectively. Each section should flow logically from one to the next.</p>
                                </list-item>
                            </list> </p>
                    </list-item>
                    <list-item>
                        <p>
                            <bold>Justification:</bold> 
                            <list list-type="bullet">
                                <list-item>
                                    <p>Provide more justification for the choice of machine learning algorithms. Explain why Random Forest (RF), Decision Tree (DT), Extreme Gradient Boosting (XGB), and Light Grading Boosting Machine (LGBM) were selected over other algorithms. Justify why these algorithms are suitable for the task at hand.</p>
                                </list-item>
                                <list-item>
                                    <p>Clarify the reasoning behind choosing specific data sampling techniques (e.g., SMOTE, Tomek-links, SMOTETOMEK) to handle imbalanced data. Explain why these techniques were deemed appropriate and how they contribute to improving model performance.</p>
                                </list-item>
                            </list> </p>
                    </list-item>
                    <list-item>
                        <p>
                            <bold>Structure:</bold> 
                            <list list-type="bullet">
                                <list-item>
                                    <p>Consider refining the structure of the report to make it more cohesive and organized. For instance, ensure that each section has a clear and specific focus, with subheadings to delineate different topics within the section.</p>
                                </list-item>
                                <list-item>
                                    <p>Provide a brief overview or summary at the beginning of each section to outline the main points that will be discussed. This will help readers understand the purpose and scope of each section more clearly.</p>
                                </list-item>
                                <list-item>
                                    <p>In the "Conclusion and Future Works" section, provide a concise summary of the key findings and implications of the study. Additionally, offers specific suggestions for future research directions based on the limitations or areas for improvement identified in the study.</p>
                                </list-item>
                            </list> </p>
                    </list-item>
                </list> By addressing these minor issues, the report will become more scientifically sound and easier to follow for readers, thereby enhancing its overall quality and impact.</p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>Yes</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>Yes</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Yes</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>Yes</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>Yes</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Yes</p>
            <p>Reviewer Expertise:</p>
            <p>Data Analysis, AI and ML, HCI, Computational Modeling and Big Data, and Web Development. Cuurently working on the application of Machine Learning for COVID-19 vaccine development.</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard.</p>
        </body>
        <sub-article article-type="response" id="comment11781-257042">
            <front-stub>
                <contrib-group>
                    <contrib contrib-type="author">
                        <name>
                            <surname>Monadhel</surname>
                            <given-names>Hind</given-names>
                        </name>
                        <aff>Computer Science, University of Technology Baghdad, Baghdad, Baghdad, Iraq</aff>
                    </contrib>
                </contrib-group>
                <author-notes>
                    <fn fn-type="conflict">
                        <p>
                            <bold>Competing interests: </bold>Non</p>
                    </fn>
                </author-notes>
                <pub-date pub-type="epub">
                    <day>14</day>
                    <month>6</month>
                    <year>2024</year>
                </pub-date>
            </front-stub>
            <body>
                <p>Dear Aritra Ghosh,</p>
                <p> </p>
                <p> Thank you for your valuable feedback and suggestions. We have carefully addressed all your comments and have revised the manuscript accordingly.</p>
                <p> </p>
                <p> We appreciate your thorough review and believe the changes have significantly improved the quality of our work.</p>
                <p> </p>
                <p> </p>
                <p> Hind M.Abdullah</p>
            </body>
        </sub-article>
    </sub-article>
</article>
