<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN" "http://jats.nlm.nih.gov/publishing/1.2/JATS-journalpublishing1.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="1.2" xml:lang="en">
    <front>
        <journal-meta>
            <journal-id journal-id-type="pmc">F1000Research</journal-id>
            <journal-title-group>
                <journal-title>F1000Research</journal-title>
            </journal-title-group>
            <issn pub-type="epub">2046-1402</issn>
            <publisher>
                <publisher-name>F1000 Research Limited</publisher-name>
                <publisher-loc>London, UK</publisher-loc>
            </publisher>
        </journal-meta>
        <article-meta>
            <article-id pub-id-type="doi">10.12688/f1000research.133594.1</article-id>
            <article-categories>
                <subj-group subj-group-type="heading">
                    <subject>Research Article</subject>
                </subj-group>
                <subj-group>
                    <subject>Articles</subject>
                </subj-group>
            </article-categories>
            <title-group>
                <article-title>Accident severity prediction modeling for road safety using random forest algorithm: an analysis of Indian highways</article-title>
                <fn-group content-type="pub-status">
                    <fn>
                        <p>[version 1; peer review: 2 approved with reservations]</p>
                    </fn>
                </fn-group>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Khanum</surname>
                        <given-names>Humera</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Software</role>
                    <role content-type="http://credit.niso.org/">Validation</role>
                    <role content-type="http://credit.niso.org/">Visualization</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0003-2689-6370</uri>
                    <xref ref-type="corresp" rid="c1">a</xref>
                    <xref ref-type="aff" rid="a1">1</xref>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Garg</surname>
                        <given-names>Anshul</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Supervision</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="corresp" rid="c2">b</xref>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Faheem</surname>
                        <given-names>Mir Iqbal</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Supervision</role>
                    <role content-type="http://credit.niso.org/">Validation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a3">3</xref>
                </contrib>
                <aff id="a1">
                    <label>1</label>Civil Engineering Department, Symbiosis Institute of Technology, Pune Campus, Symbiosis International (Deemed University), Pune, Maharashtra, 412115, India</aff>
                <aff id="a2">
                    <label>2</label>School of Civil Engineering, Lovely Professional University, Phagwara, Punjab, 1444411, India</aff>
                <aff id="a3">
                    <label>3</label>Civil Engineering Department, Deccan College of Engineering and Technology, Hyderabad, Telangana, 500001, India</aff>
            </contrib-group>
            <author-notes>
                <corresp id="c1">
                    <label>a</label>
                    <email xlink:href="mailto:humera.khanum@sitpune.edu.in">humera.khanum@sitpune.edu.in</email>
                </corresp>
                <corresp id="c2">
                    <label>b</label>
                    <email xlink:href="mailto:anshul.18374@lpu.co.in">anshul.18374@lpu.co.in</email>
                </corresp>
                <fn fn-type="conflict">
                    <p>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>15</day>
                <month>5</month>
                <year>2023</year>
            </pub-date>
            <pub-date pub-type="collection">
                <year>2023</year>
            </pub-date>
            <volume>12</volume>
            <elocation-id>494</elocation-id>
            <history>
                <date date-type="accepted">
                    <day>5</day>
                    <month>5</month>
                    <year>2023</year>
                </date>
            </history>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2023 Khanum H et al.</copyright-statement>
                <copyright-year>2023</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access article distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <self-uri content-type="pdf" xlink:href="https://f1000research.com/articles/12-494/pdf"/>
            <abstract>
                <p>
                    <bold>Background:</bold> Road accidents claim around 1.35 million lives annually, with countries like India facing a significant impact. In 2019, India reported 449,002 road accidents, causing 151,113 deaths and 451,361 injuries. Accident severity modeling helps understand contributing factors and develop preventive strategies. AI models, such as random forest, offer adaptability and higher predictive accuracy compared to traditional statistical models. This study aims to develop a predictive model for traffic accident severity on Indian highways using the random forest algorithm.</p>
                <p>
                    <bold>Methods:</bold> A multi-step methodology was employed, involving data collection and preparation, feature selection, training a random forest model, tuning parameters, and evaluating the model using accuracy and F1 score. Data sources included MoRTH and NHAI.</p>
                <p>
                    <bold>Results:</bold> The classification model had hyperparameters &#x2018;max depth&#x2019;:&#x00a0; 10, &#x2018;max features&#x2019;: &#x2018;sqrt&#x2019;, and &#x2018;n estimators&#x2019;: 100. The model achieved an overall accuracy of 67% and a weighted average F1-score of 0.64 on the training set, with a macro average F1-score of 0.53. Using grid search, a random forest Classifier was fitted with optimal parameters, resulting in 41.47% accuracy on test data.</p>
                <p>
                    <bold>Conclusions:</bold> The random forest classifier model predicted traffic accident severity with 67% accuracy on the training set and 41.47% on the test set, suggesting possible bias or imbalance in the dataset. No clear patterns were found between the day of the week and accident occurrence or severity. Performance can be improved by addressing dataset imbalance and refining model hyperparameters. The model often underestimated accident severity, highlighting the influence of external factors. Adopting a sophisticated data recording system in line with MoRTH and IRC guidelines and integrating machine learning techniques can enhance road safety modeling, decision-making, and accident prevention efforts.</p>
            </abstract>
            <kwd-group kwd-group-type="author">
                <kwd>Traffic Accidents</kwd>
                <kwd>Accident Severity</kwd>
                <kwd>Road Safety</kwd>
                <kwd>Accident Prediction Modeling</kwd>
                <kwd>Random Forest</kwd>
            </kwd-group>
            <funding-group>
                <funding-statement>The author(s) declared that no grants were involved in supporting this work.</funding-statement>
            </funding-group>
        </article-meta>
    </front>
    <body>
        <sec id="sec1" sec-type="intro">
            <title>Introduction</title>
            <p>Road accidents are a significant public health concern worldwide, with an estimated 1.35 million deaths caused by road traffic accidents each year.
                <sup>
                    <xref ref-type="bibr" rid="ref1">1</xref>
                </sup> Developing countries, such as India, are disproportionately affected, with over 150,000 fatalities reported annually.
                <sup>
                    <xref ref-type="bibr" rid="ref1">1</xref>
                </sup> Road safety is a major concern in India, with a large number of accidents and fatalities reported each year. According to the Ministry of Road Transport and Highways, there were 449,002 road accidents in India in 2019, resulting in 151,113 deaths and 451,361 injuries.
                <sup>
                    <xref ref-type="bibr" rid="ref2">2</xref>
                </sup> To understand the factors that contribute to accidents and to develop strategies to prevent them, accident severity modelling is a statistical technique used in the field of road safety.</p>
            <p>The modelling process involves analyzing data on past accidents and identifying the factors that contributed to their occurrence and severity. These factors can include road conditions, weather, driver behaviour, and vehicle type, among others. The goal of accident severity modelling is to identify the factors that are most important in contributing to accidents and to develop evidence-based strategies to improve road safety and reduce accidents&#x2019; number and severity.
                <sup>
                    <xref ref-type="bibr" rid="ref3">3</xref>
                </sup>
                <sup>&#x2013;</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref6">6</xref>
                </sup>
            </p>
            <p>Statistical models, such as logit models and probit models,
                <sup>
                    <xref ref-type="bibr" rid="ref7">7</xref>
                </sup> have been widely used for predicting traffic accidents&#x2019; severity since the early 1990s. However, if assumptions imposed on these models are violated, results may be inaccurate. Artificial intelligence models, on the other hand, do not make any assumptions and are more adaptable. They are capable of handling intricate nonlinear relationships and generally offer higher predictive accuracy than statistical approaches. random forest (RF) is a powerful and versatile algorithm for accident severity prediction, with several advantages over other machine learning algorithms. It has been successfully applied in various contexts, and its performance can be further improved by tuning the key parameters and carefully pre-processing the data.
                <sup>
                    <xref ref-type="bibr" rid="ref8">8</xref>
                </sup>
                <sup>&#x2013;</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref11">11</xref>
                </sup>
            </p>
            <p>The performance of the random forest algorithm is significantly influenced by the selection of hyperparameters.
                <sup>
                    <xref ref-type="bibr" rid="ref12">12</xref>
                </sup>
            </p>
            <p>To optimize its performance, identifying the optimal parameter values is crucial. Previous research has predominantly utilized grid search to explore values within the parameter space. However, alternative approaches may be necessary to overcome the computational challenge of grid search in high-dimensional parameter spaces.
                <sup>
                    <xref ref-type="bibr" rid="ref13">13</xref>
                </sup> Random search and Bayesian optimization are effective alternatives to grid search for hyperparameter optimization in random forest, particularly in high-dimensional parameter spaces.
                <sup>
                    <xref ref-type="bibr" rid="ref13">13</xref>
                </sup>
            </p>
            <p>Accurate prediction of traffic accident severity is essential for improving emergency response, reducing fatalities, and minimizing injuries. To achieve this goal, accurate data, appropriate machine learning algorithms, and regular updates to the predictive models are necessary. Given the advantages of random forest models in terms of prediction accuracy and interpretability, they can be used as the primary predictive model for traffic accident severity on Indian highways. Several factors contribute to accidents on Indian highways, including poorly designed or maintained roads, speeding, and roadside hazards. Identifying these factors through accident severity modelling can help develop evidence-based strategies to improve road safety and reduce accidents&#x2019; number and severity.
                <sup>
                    <xref ref-type="bibr" rid="ref14">14</xref>
                </sup>
                <sup>&#x2013;</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref16">16</xref>
                </sup>
            </p>
            <p>Indian highways have a high incidence of accidents, and several contributing factors have been identified. These include road design and geometry, speed, roadside hazards, and driver behavior. Poorly designed or maintained roads, such as those with narrow or winding stretches, lack of markings, and poor road surfaces, increase the likelihood of accidents.
                <sup>
                    <xref ref-type="bibr" rid="ref17">17</xref>
                </sup> Speeding is a major factor in many accidents on Indian highways, which may result from a lack of enforcement, cultural norms, and driver attitudes.
                <sup>
                    <xref ref-type="bibr" rid="ref18">18</xref>
                </sup> Roadside hazards, such as trees, poles, and animals, are prevalent on Indian highways and increase the risk of collision.
                <sup>
                    <xref ref-type="bibr" rid="ref19">19</xref>
                </sup> Driver behavior, including drunk, distracted, and reckless driving, is also a significant contributor to accidents on Indian highways.
                <sup>
                    <xref ref-type="bibr" rid="ref15">15</xref>
                </sup> Addressing these factors is crucial to improving road safety and reducing accidents on Indian highways.</p>
            <p>The main objective of our study is to develop a predictive model for the severity of traffic accidents on Indian highways. To achieve this goal, we have chosen random forest models due to their ability to provide accurate predictions and interpretability.</p>
            <p>The findings of our study will be used to develop a predictive model for accident severity that can inform road safety policies and interventions. This model can be used to identify high-risk areas and to prioritize resources for accident prevention and mitigation.</p>
            <sec id="sec2">
                <title>Study Areas</title>
                <p>The study areas selected are the National Highways two stretches as mentioned below
                    <list list-type="order">
                        <list-item>
                            <label>1.</label>
                            <p>Pune-Sholapur Section of NH-9 in km.144/400 to Km. 249/000 in the State of Maharashtra (
                                <xref ref-type="fig" rid="f1">Figure 1</xref>).</p>
                        </list-item>
                        <list-item>
                            <label>2.</label>
                            <p>Six-Laning of Barwa-Adda-Panagarh Section of NH-2 from km 398.240 to km 521.120 including Panagarh Bypass in the States of Jharkhand and West Bengal (
                                <xref ref-type="fig" rid="f2">Figure 2</xref>).</p>
                        </list-item>
                    </list>
                </p>
                <fig fig-type="figure" id="f1" orientation="portrait" position="float">
                    <label>Figure 1. </label>
                    <caption>
                        <title>Pune-Sholapur Section of NH-9 in km.144/400 to Km. 249/000 in the State of Maharashtra.</title>
                    </caption>
                    <graphic id="gr1" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/146596/53f6d316-1660-4fb2-ac64-0d5b032416c2_figure1.gif"/>
                </fig>
                <fig fig-type="figure" id="f2" orientation="portrait" position="float">
                    <label>Figure 2. </label>
                    <caption>
                        <title>Six-Laning of Barwa-Adda-Panagarh Section of NH-2 from km 398.240 to km 521.120 including Panagarh Bypass in the States of Jharkhand and West Bengal.</title>
                    </caption>
                    <graphic id="gr2" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/146596/53f6d316-1660-4fb2-ac64-0d5b032416c2_figure2.gif"/>
                </fig>
                <p>The study areas for this research project were selected based on specific criteria. Firstly, the researchers had prior experience of working on one of the stretches, which is the Pune-Sholapur Section of NH-9 in km.144/400 to Km. 249/000 in the State of Maharashtra. This experience could have provided insights and knowledge that could be useful in conducting the study.</p>
                <p>Additionally, data was also provided by the same concessionaire as of the previous stretch on request for another stretch, which is the Six-Laning of Barwa-Adda-Panagarh Section of NH-2 from km 398.240 to km 521.120 including Panagarh Bypass in the States of and West Bengal. This data could have been relevant to the research objectives and could have assisted in achieving the desired outcomes.</p>
            </sec>
        </sec>
        <sec id="sec3" sec-type="methods">
            <title>Methods</title>
            <p>The proposed methodology for this research involves the following steps for implementing a random forest model machine learning technique for the accident severity prediction.</p>
            <p>Data Preparation: The first step in implementing a random forest model for accident severity prediction is to collect and prepare data. Raw data of road accidents for the selected stretches of the highway can be obtained from secondary sources such as the Ministry of Road Transport and Highways (MoRTH) and National Highways Authority of India (NHAI).
                <sup>
                    <xref ref-type="bibr" rid="ref2">2</xref>
                </sup> Data wrangling and mining techniques can be used to clean and preprocess the data.</p>
            <p>Feature Selection: Once the data is prepared, selecting appropriate features for the model becomes crucial. Feature selection plays a vital role in reducing the dimensionality of the data and enhancing the model&#x2019;s accuracy. There are several techniques available for feature selection, such as statistical tests, correlation analysis, and principal component analysis (PCA).
                <sup>
                    <xref ref-type="bibr" rid="ref20">20</xref>
                </sup>
            </p>
            <p>Model Training: In the next step, a random forest model can be trained on the preprocessed data. The model can be developed using a machine learning based framework, as described in Breiman&#x2019;s work on random forest.
                <sup>
                    <xref ref-type="bibr" rid="ref21">21</xref>
                </sup> The RF algorithm involves bagging and random feature selection techniques to create multiple decision trees that are aggregated to form a stronger learner.
                <sup>
                    <xref ref-type="bibr" rid="ref22">22</xref>
                </sup>
            </p>
            <p>Parameter Tuning: To improve the performance of a random forest model, it is important to fine-tune its parameters. The three key parameters that significantly impact the tuning performance of the random forest model are the total number of trees (n_estimators), the number of features used for each node segmentation (max_feature), and the maximum depth of a tree (max_depth).
                <sup>
                    <xref ref-type="bibr" rid="ref23">23</xref>
                </sup>
            </p>
            <p>Model Evaluation: After training the random forest model and optimizing its parameters, it is important to evaluate the model&#x2019;s performance. Various evaluation metrics can be used, including accuracy, precision, recall, F1 score, and Area Under the Curve - Receiver Operating Characteristis (AUC-ROC) curve.
                <sup>
                    <xref ref-type="bibr" rid="ref24">24</xref>
                </sup>
            </p>
            <p>Model Implementation: Once the model has been trained and evaluated, it can be deployed for accident severity prediction. The methodology can be designed using python for building the model and forecasting the severity of road traffic accidents on Indian highways.</p>
            <sec id="sec4">
                <title>Source data</title>
                <p>Data on road accidents from selected stretches of highways was obtained from the Concessionaires of the National Highways Authority of India (NHAI) for two projects: Pune-Solapur and Bengal (BAEL) Section. For the Pune-Solapur Section of NH-9, which is located between km. 144/400 and km. 249/000 in the state of Maharashtra, accident dates from 2013 to 2018 were used. For the Six-Laning of Barwa-Adda-Panagarh Section of NH-2, which includes Panagarh Bypass and is located in the States of Jharkhand and West Bengal Stretch, accident dates from 2015 to 2019 were used for the stretch between km 398.240 and km 521.120. The raw data was subject to exploratory data analysis, as detailed in the following section.</p>
            </sec>
            <sec id="sec5">
                <title>Data Preparation</title>
                <p>In this stage, data gathering and exploration is performed using secondary source data. The dataset consists of 3257 observations out of which the 1855 observations are of Bengal (BAEL) Section and 1402 observations are of Pune- Solapur and 32 variables, including the target variable &#x201c;accident severity.&#x201d; The 32 attributes and their corresponding mappings are presented in 
                    <xref ref-type="table" rid="T1">Table 1</xref>.</p>
                <table-wrap id="T1" orientation="portrait" position="float">
                    <label>Table 1. </label>
                    <caption>
                        <title>Dataset Attributes and Parameters Mapping.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Sl No</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Attributes</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Mapping</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">1</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Accident Index</td>
                                <td colspan="1" rowspan="1"/>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">2</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Date</td>
                                <td colspan="1" rowspan="1"/>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">3</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Day of Week</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">1-Sunday, 2-Monday, 3-Tuesday, 4-Wednesday, 5- Thursday, 6-Friday, 7-Saturday</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">4</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Time of Accident, Accident Location-A</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">1-Urban, 2-Rural, 3-Unallocated</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">5</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Accident Location-A Chainage-km</td>
                                <td colspan="1" rowspan="1"/>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">6</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Accident Location-A Chainage-km-RoadSide</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">LHS, RHS</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">7-9</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Nature of Accident-B1, Nature of Accident- B2, Nature of Accident-B3</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">1-Overturning, 2-Head on collision, 3-Rear End Collision, 4-Collision Brush/Side Wipe, 5-Right Turn Collision, 6- Skidding, 7a-Others-Hit Cyclist, 7b-Others-Hit Pedestrian, 7c-Others-Hit Parked Vehicle, 7d-Others-Hit Fixed Object, 7e-Others-Wrong Side Driving, 7f-Others-Hit Animal, 7g- Others-Hit Two Wheeler, 7h-Others-Unknown, 7i-Others- Fallen down, 8-Overtaking vehicle, 9-Left Turn Collision</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">10</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Accident Severity -C</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">1-Fatal, 2-Grevious Injury, 3-Minor Injury, 4-No Injury</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">11-13</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Classification of Accident-C1, Classification of Accident-C2, Classification of Accident-C3</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">1-Fatal, 2-Grevious Injury, 3-Minor Injury, 4-Non - Injury (Damage only)</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">14-18</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Causes-D1, Causes-D2, Causes-D3, Causes- D4, Causes-D5</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">1-Drunken, 2-Overspeeding, 3-Vehicle out of control, 4a- Fault of driver of motor vehicle, 4b-Driver of other vehicle, 4c-Cyclist, 4d-Pedestrian, 4e-Passenger, 4f-Animal, 5a- Defect in mechanical condition of motor vehicle, 5b-Road condition</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">19</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Road Feature-E</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">1-Single lane, 2-Two lanes, 3-Three lanes or more without central divider median, 4-Four lanes or more with central divider alongwith carriageway width</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">20</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Road Condition-F</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">1-Straight Road, 2-Slight Curve, 3-Sharp Curve, 4-Flat Road, 5-Gentle incline, 6-Steep incline 7-Hump, 8-Dip</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">21</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Intersection Type-G</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">1-T Junction, 2-&#x2019;Y Junction, 3-&#x2019;Four arm junction, 4- Staggered junction, 5-Roundabout, 6-Uncontrolled junction</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">22</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Weather Conditions-H</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">1-Fine, 2-Mist/Fog
                                    <break/>3-Cloud, 4-Light Rain, 5-Heavy Rain, 6-Hail/sleet, 7- Snow, 8-Strong Wind, 9-Dust Storm
                                    <break/>10-Very Hot, 11-Very Cold, 12-Other extraordinary weather condition</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">23-26</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Vehicle Type Involved-J-V1, Vehicle Type Involved-J-V2, Vehicle Type Involved-J-V3, Vehicle Type Involved-J-V4</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">1-Car/Jeep/Van, 2-SUV, 3-Bus, 4-Mini Bus, 5-Truck, 6- Two Wheeler, 7-Three Wheeler, 8-Cycle, 9-Pedestrian, 10- Tractor, 11-Unknown, 12-Animal, 13-Objects, 14-LCV, 15- MAV</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">27</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Number of Vehicles</td>
                                <td colspan="1" rowspan="1"/>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">28</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Number of Casualties-Fatal</td>
                                <td colspan="1" rowspan="1"/>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">29</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Number of Casualties-Grievous Injury</td>
                                <td colspan="1" rowspan="1"/>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">30</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Number of Casualties-Minor Injury</td>
                                <td colspan="1" rowspan="1"/>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">31</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Number of Casualties-Non Injured</td>
                                <td colspan="1" rowspan="1"/>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">32</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Number of Casualties</td>
                                <td colspan="1" rowspan="1"/>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
            </sec>
            <sec id="sec6">
                <title>Data Modelling</title>
                <p>The random forest classification algorithm has been employed in this study to forecast the severity of road traffic accidents in India. This section details the procedure for implementing the model, performance evaluation, and discuss the results obtained. The random forest algorithm is written using python programming language.</p>
                <p>The target variable for the random forest model is selected as the&#x2019;Accident Severity&#x2019; which has classes as Fatal, Grevious Injury, Minor Injury and No Injury and indexed as [1-Fatal, 2-Grevious Injury, 3-Minor Injury, 4-No Injury.</p>
                <p>The dataset is partitioned into training and testing sets with a ratio of 80% and 20%, respectively. The hyperparameters&#x2019;n_estimators&#x2019; and&#x2019;max_depth&#x2019; are specified, and a grid search is conducted with cross-validation (cv=5) to identify the optimal hyperparameters. The best parameters and scores are obtained. The best estimator is fit on the training data. Predictions are made on the test data and the accuracy of the model is obtained.</p>
                <p>The algorithm and programme for Accident Severity Modelling using random forest are written in the Python programming language, and the code is made available to the public for further development. The source code can be 
                    <ext-link ext-link-type="uri" xlink:href="http://accessed">accessed</ext-link> via the software availability statement.</p>
                <p>Accuracy analysis on test data: Three metrics were employed to evaluate the effectiveness of the algorithms: accuracy, precision, and recall. These metrics are defined as follows:</p>
                <p>Accuracy: The formula for a metric that measures the proportion of correctly predicted observations to the total number of observations is represented as:
                    <disp-formula id="e1">
                        <mml:math display="block">
                            <mml:mfenced close=")" open="(">
                                <mml:mrow>
                                    <mml:mi>T</mml:mi>
                                    <mml:mspace width="0.25em"/>
                                    <mml:mi>P</mml:mi>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi>T</mml:mi>
                                    <mml:mspace width="0.25em"/>
                                    <mml:mi>N</mml:mi>
                                </mml:mrow>
                            </mml:mfenced>
                            <mml:mo>/</mml:mo>
                            <mml:mfenced close=")" open="(">
                                <mml:mrow>
                                    <mml:mi>T</mml:mi>
                                    <mml:mspace width="0.25em"/>
                                    <mml:mi>P</mml:mi>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi>T</mml:mi>
                                    <mml:mspace width="0.25em"/>
                                    <mml:mi>N</mml:mi>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi>F</mml:mi>
                                    <mml:mspace width="0.25em"/>
                                    <mml:mi>P</mml:mi>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi mathvariant="italic">FN</mml:mi>
                                </mml:mrow>
                            </mml:mfenced>
                        </mml:math>
                    </disp-formula>
                </p>
                <p>Precision is a metric that indicates the ratio of correctly predicted positive observations to the total number of predicted positive observations, and is calculated using the formula:
                    <disp-formula id="e2">
                        <mml:math display="block">
                            <mml:mi>T</mml:mi>
                            <mml:mspace width="0.25em"/>
                            <mml:mi>P</mml:mi>
                            <mml:mo>/</mml:mo>
                            <mml:mfenced close=")" open="(">
                                <mml:mrow>
                                    <mml:mi>T</mml:mi>
                                    <mml:mspace width="0.25em"/>
                                    <mml:mi>P</mml:mi>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi>F</mml:mi>
                                    <mml:mspace width="0.25em"/>
                                    <mml:mi>P</mml:mi>
                                </mml:mrow>
                            </mml:mfenced>
                        </mml:math>
                    </disp-formula>
                </p>
                <p>Recall is a metric that reflects the ratio of correctly predicted positive observations to the total number of actual positive observations, and is determined using the formula:
                    <disp-formula id="e3">
                        <mml:math display="block">
                            <mml:mi>T</mml:mi>
                            <mml:mspace width="0.25em"/>
                            <mml:mi>P</mml:mi>
                            <mml:mo>/</mml:mo>
                            <mml:mfenced close=")" open="(">
                                <mml:mrow>
                                    <mml:mi>T</mml:mi>
                                    <mml:mspace width="0.25em"/>
                                    <mml:mi>P</mml:mi>
                                    <mml:mo>+</mml:mo>
                                    <mml:mi mathvariant="italic">FN</mml:mi>
                                </mml:mrow>
                            </mml:mfenced>
                        </mml:math>
                    </disp-formula>
                </p>
            </sec>
        </sec>
        <sec id="sec7" sec-type="result|discussion">
            <title>Result and Discussion</title>
            <sec id="sec8">
                <title>Model Performance</title>
                <p>The classification model used three hyperparameters -&#x2019;max_depth&#x2019;: 10,&#x2019;max_features&#x2019;:&#x2019;sqrt&#x2019;, and&#x2019;n_estimators&#x2019;: 100, and the results generated a confusion matrix for the training set. The matrix indicated the number of correctly and incorrectly classified instances for each class. The classification report provided precision, recall, and f1-score for each class, along with support. The model showed high precision and recall for class 1 but low precision and recall for classes 2, 3, and 4, with an overall accuracy of 67% and a weighted average f1-score of 0.64 on the training set. The macro average f1-score, which assigns equal weight to each class, was 0.53.</p>
                <p>The optimal parameters for a random forest classifier model were determined through a grid search, with a max depth of 2, n estimators of 5000, and a random state of 0. The model was then applied to the test data, and the predictions were saved in an Excel file called &#x201c;predicted output3.xls&#x201d; for further analysis. The accuracy of the model on the test data was determined to be 0.4147, or approximately 41.47%, indicating that it accurately predicted the severity of traffic accidents in about 41.47% of test cases.</p>
                <p>
                    <bold>Predicted outputs</bold>
                </p>
                <p>
                    <bold>Comparative analysis of observed and predicted accident severity index against dates</bold>
                </p>
                <p>The actual accident severity indices are represented by the observed values, while the predicted values are generated by the random forest model using the input features.</p>
                <p>The following is a summary (
                    <xref ref-type="fig" rid="f3">Figure 3</xref>) of the comparison between the observed and predicted values:</p>
                <fig fig-type="figure" id="f3" orientation="portrait" position="float">
                    <label>Figure 3. </label>
                    <caption>
                        <title>Comparison between Observed and Predicted Accident Severity Index.</title>
                    </caption>
                    <graphic id="gr3" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/146596/53f6d316-1660-4fb2-ac64-0d5b032416c2_figure3.gif"/>
                </fig>
                <p>On dates such as 25-02-2017, 17-04-2017, and 22-04-2017, the random forest model accurately predicts the accident severity index.</p>
                <p>In a number of instances, the model predicts a lower accident severity index value than the observed value. 18-02-2017, 23-02-2017, and 27-03-2017, for example.</p>
                <p>Occasionally, the model overestimates the accident severity index by predicting a higher value than the observed value, as on 24-05-2017 and 20-10-2017.</p>
                <p>In general, the model frequently predicts a severity index of 2 for accidents, even when the observed values are distinct. This may indicate a bias in the model, possibly as a result of an imbalance in the training dataset, in which severity index 2 occurs more frequently than other categories.</p>
                <p>
                    <bold>Comparative analysis of observed and predicted accident severity index against time</bold>
                </p>
                <p>
                    <xref ref-type="fig" rid="f4">Figure 4</xref> displays the date, day of the week, and time of the accident, as well as the observed and predicted accident severity indices. The plotted for the 165 rows of predicted data doesn&#x2019;t fit in A4 sheet hence the data is published and the link is provided in the Tableau graphs visuals availbility [i].</p>
                <fig fig-type="figure" id="f4" orientation="portrait" position="float">
                    <label>Figure 4. </label>
                    <caption>
                        <title>Comparative analysis of observed and predicted accident severity index against time.</title>
                    </caption>
                    <graphic id="gr4" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/146596/53f6d316-1660-4fb2-ac64-0d5b032416c2_figure4.gif"/>
                </fig>
                <p>The dataset contains accident data from February 18, 2017 to December 31, 2017, as determined by Tableau analysis of the plot generated from the provided Excel table.</p>
                <p>The observed accident severity index ranges from 1 to 4, where 1 corresponds to the least severe accident and 4 to the most severe accident.</p>
                <p>The observed severity index for the vast majority of accidents in the dataset is 3, followed by 4. 2 indicates a less severe accident, while 4 indicates a more severe accident.</p>
                <p>The majority of accidents within the dataset have a predicted severity index of 2, followed by an index of 1.</p>
                <p>The analysis of the scatter plot reveals that the predicted severity index is typically lower than the observed severity index. This suggests that the model used to predict the severity of accidents is not always accurate and could be improved.</p>
            </sec>
            <sec id="sec9">
                <title>Comparative analysis of observed and predicted accident severity index against Location and Chainages- RHS</title>
                <p>The Tableau plot (
                    <xref ref-type="fig" rid="f5">Figure 5</xref>) presents a detailed visual analysis of accident data on the right-hand side of the road. The data is organized by date and day of the week, displaying the accident location, observed accident severity index, and predicted accident severity index for each incident. The plot effectively illustrates the spatial distribution of accidents and their severity over time, enabling the identification of patterns and trends. The Tableau plot doesn&#x2019;t fit in A4 sheet hence the data is published and the link is provided in the Tableau graphs visuals availability [ii].</p>
                <fig fig-type="figure" id="f5" orientation="portrait" position="float">
                    <label>Figure 5. </label>
                    <caption>
                        <title>Comparative analysis of observed and predicted accident severity index against Location and Chainages-RHS.</title>
                    </caption>
                    <graphic id="gr5" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/146596/53f6d316-1660-4fb2-ac64-0d5b032416c2_figure5.gif"/>
                </fig>
                <p>It is evident from the analysis that the majority of accidents have an observed severity index of 2 or 3, indicating a moderate severity. However, the predicted accident severity index largely remains at 2, indicating that the predictions may be somewhat conservative and do not fully capture the observed severity range.</p>
                <p>In addition, there appears to be no correlation between the day of the week and the frequency or severity of accidents across the different days of the week. This may suggest that external factors, such as traffic patterns or weather conditions, have a greater impact on the occurrence and severity of accidents than the day of the week.</p>
            </sec>
            <sec id="sec10">
                <title>Comparative analysis of observed and predicted accident severity index against Location and Chainages- LHS</title>
                <p>The graph displays (
                    <xref ref-type="fig" rid="f6">Figure 6</xref>) the date, day of the week, and accident location on Left Hand Side (LHS) of the road, as well as the observed and predicted accident severity indices. The plotted of predicted data doesn&#x2019;t fit in A4 sheet hence the data is published and the link is provided in the Tableau graphs visuals availability [iii].</p>
                <fig fig-type="figure" id="f6" orientation="portrait" position="float">
                    <label>Figure 6. </label>
                    <caption>
                        <title>Comparative analysis of observed and predicted accident severity index against Location and Chainages-LHS.</title>
                    </caption>
                    <graphic id="gr6" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/146596/53f6d316-1660-4fb2-ac64-0d5b032416c2_figure6.gif"/>
                </fig>
                <p>The scatterplot reveals that the majority of accidents on the left side of the road had a severity index of 2 or 3, with only a few instances of severity index 1 and 4. This indicates that the majority of collisions on the left side of the road were of moderate severity.</p>
                <p>In the majority of cases, the predicted accident severity index was 2, with only a few instances of values 3 and 4. This suggests that the predictive model may be biased towards predicting less severe accidents.</p>
                <p>There was no discernible pattern or trend between the day of the week and the occurrence of accidents. Accidents appeared to occur every day, indicating that the day of the week may not be a significant predictor of accident severity on the left side of the road.</p>
                <p>The accident locations, as measured by Accident Location A Chainage km, were scattered along the roadway at various distances. This suggests that there may not be a particular accident hotspot or concentration on the left-hand side of the road.</p>
            </sec>
            <sec id="sec11">
                <title>Data Recording and availability</title>
                <p>The recording of road accident data in India must comply with the MoRTH &amp; IRC guidelines, utilizing the Road Accident Recording and Reporting Formats. Despite this, there exists a need for a more advanced data recording system to effectively model road safety. The digital monitoring of road accidents can increase the frequency of data collection and minimize the absence of crucial information. Often, the lack of a system or individual to document the accident leads to the absence of important road accident data. This missing data can be regained through the use of machine learning, thus enhancing the accuracy of road safety modeling.</p>
            </sec>
        </sec>
        <sec id="sec12" sec-type="conclusion">
            <title>Conclusion</title>
            <p>The random forest classifier model predicted the severity of traffic accidents with an overall accuracy of 67% on the training set and approximately 41.47% on the test set. Indicating possible bias or imbalance in the training dataset, the model tended to predict a lower severity index than the observed values. There were no discernible relationships between the day of the week and the occurrence or severity of accidents. The performance of the model can be enhanced by correcting the dataset imbalance and refining the model&#x2019;s hyperparameters.</p>
            <p>The observed and predicted accident severity indices were compared against a number of variables, including dates, times, and locations on both sides of the road. In some instances, the model accurately predicted the accident severity index, but it frequently underestimated accident severity. No discernible patterns or trends were observed in terms of accident location, indicating that external factors may have a greater influence on the occurrence and severity of accidents.</p>
            <p>To improve road safety modelling, it is essential to adopt a more sophisticated data recording system consistent with MoRTH and IRC recommendations. Digital monitoring of road accidents can increase the frequency of data collection and reduce the loss of vital information. Integrating machine learning techniques can contribute to more effective interventions and decision-making in the field of traffic accident prevention and mitigation.</p>
        </sec>
        <sec id="sec13">
            <title>Future Scope</title>
            <p>The study presented provides a good starting point for future research in the field of road safety modeling and accident prevention for Indian highways. However, with the limitations of the present study there opens potential areas for future research as mentioned below which will be taken up in continuation.</p>
            <p>Dataset improvement: The study identified the possibility of dataset bias and imbalance affecting model performance. Future research will focus on improving the quality and quantity of data, reducing bias and improving model performance. This will involve exploring alternative data sources, enhancing data collection methods, and addressing data quality issues.</p>
            <p>Model improvement: The study used the random forest algorithm to develop a predictive model for traffic accident severity. In future research, other machine learning algorithms or ensemble models to improve model performance will be explored. Additionally, refining hyperparameters and addressing dataset imbalance will be done to improve model accuracy.</p>
            <p>External factors analysis: The study highlighted the influence of external factors on accident severity prediction. Future research can focus on exploring the impact of external factors such as weather conditions, road infrastructure, and driver behavior on accident severity. This can enhance the accuracy of predictive models and inform decision-making in accident prevention efforts.</p>
            <p>Real-time monitoring: The study highlighted the need for a sophisticated data recording system in line with MoRTH and IRC guidelines. Future research can focus on developing a real-time monitoring system that can capture road safety data in real-time and provide insights for accident prevention efforts.</p>
        </sec>
    </body>
    <back>
        <sec id="sec18" sec-type="data-availability">
            <title>Data availability</title>
            <p>Zenodo. Data for Accident Severity Prediction Modelling for Indian Highways Case Study, 
                <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5281/zenodo.7773156">https://doi.org/10.5281/zenodo.7773156</ext-link>.
                <sup>

                    <xref ref-type="bibr" rid="ref25">25</xref>
</sup>
            </p>
            <p>This project contains the following underlying data:
                <list list-type="bullet">
                    <list-item>
                        <label>&#x2022;</label>
                        <p>
Accdataset_hk_PS_BAEL_Combined.csv (The dataset consists of 3257 observations out of which the 1855 observations are of Bengal (BAEL) Section and 1402 observations are of Pune- Solapur.)</p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>
predicted_output_1.xlsx (This is level-2 processed data derived from raw accident data using prediction modeling. The data has been indexed from 1 to 4 for further analysis, and there are a total of 165 rows in the predicted output observations.
</p>
                    </list-item>
                </list>
            </p>
            <p>Data are available under the terms of the 
                <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/legalcode">Creative Commons Attribution 4.0 International license</ext-link> (CC-BY 4.0).</p>
        </sec>
        <sec id="sec14">
            <title>Software availability</title>
            <p>

                <list list-type="bullet">
                    <list-item>
                        <label>&#x2022;</label>
                        <p>Github: 
                            <ext-link ext-link-type="uri" xlink:href="https://github.com/humera-k/RF_Accident_Severity">https://github.com/humera-k/RF_Accident_Severity
</ext-link>
                        </p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>

                            <ext-link ext-link-type="uri" xlink:href="https://zenodo.org/badge/latestdoi/616376786">https://zenodo.org/badge/latestdoi/616376786</ext-link>
</p>
                    </list-item>
                </list>
            </p>
            <p>Data are available under the terms of the 
                <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/legalcode">Creative Commons Attribution 4.0 International license</ext-link> (CC-BY 4.0).</p>
        </sec>
        <sec id="sec15">
            <title>Tableau graphs visuals availability</title>
            <p>

                <list list-type="order">
                    <list-item>
                        <label>1.</label>
                        <p>

                            <ext-link ext-link-type="uri" xlink:href="https://public.tableau.com/app/profile/humera.khanum/viz/Accidental_Analysis_1/Sheet52">
Accidental_Analysis_1 | Tableau Public</ext-link> (Comparative analysis of observed and predicted accident severity index against time)</p>
                    </list-item>
                    <list-item>
                        <label>2.</label>
                        <p>

                            <ext-link ext-link-type="uri" xlink:href="https://public.tableau.com/app/profile/humera.khanum/viz/Accidental_Analysis_1/Sheet3">
Accidental_Analysis_1
</ext-link> (Comparative analysis of observed and predicted accident severity index against Location and Chainages-Right hand Side (RHS))</p>
                    </list-item>
                    <list-item>
                        <label>3.</label>
                        <p>

                            <ext-link ext-link-type="uri" xlink:href="https://public.tableau.com/views/Accidental_Analysis_1/Sheet4?:language=en-US&amp;publish=yes&amp;:display_count=n&amp;:origin=viz_share_link">
Accidental_Analysis_1
</ext-link> (Comparative analysis of observed and predicted accident severity index against Location and Chainages-Left Hand Side (LHS))
</p>
                    </list-item>
                </list>
            </p>
        </sec>
        <ack>
            <title>Acknowledgement</title>
            <p>We are grateful to National Highways Authority of India and IL&amp;FS Engineering and Construction Company for making the raw accident data available.</p>
        </ack>
        <ref-list>
            <title>References</title>
            <ref id="ref1">
                <label>1</label>
                <mixed-citation publication-type="other">
                    <collab>World Health Organization</collab>:
                    <article-title>Global Status Report on Road Safety 2018.</article-title>
                    <ext-link ext-link-type="uri" xlink:href="https://www.who.int/publications/i/item/9789241565684">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref2">
                <label>2</label>
                <mixed-citation publication-type="other">
                    <collab>Ministry of Road Transport &amp; Highways</collab>:
                    <article-title>Road Accidents in India 2019.</article-title>
                    <ext-link ext-link-type="uri" xlink:href="https://morth.nic.in/road-accident-in-india">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref3">
                <label>3</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Mannering</surname>
                            <given-names>F</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bhat</surname>
                            <given-names>C</given-names>
                        </name>
</person-group>:
                    <source>

                        <italic toggle="yes">Statistical Methods for Analyzing Highway Accident Data.</italic>
</source>
                    <publisher-name>John Wiley &amp; Sons</publisher-name>;<year>2014</year>.</mixed-citation>
            </ref>
            <ref id="ref4">
                <label>4</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Barbosa</surname>
                            <given-names>P</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Andrade</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ferreira</surname>
                            <given-names>S</given-names>
                        </name>
</person-group>:
                    <article-title>Machine learning applied to road safety modeling: A systematic literature review.</article-title>
                    <source>

                        <italic toggle="yes">Journal of Traffic and Transportation Engineering (English Edition).</italic>
</source>
                    <year>2020</year>;<volume>7</volume>(<issue>6</issue>):<fpage>775</fpage>&#x2013;<lpage>790</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.jtte.2020.07.004</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref5">
                <label>5</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Al-Mistarehi</surname>
                            <given-names>BW</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Alomari</surname>
                            <given-names>AH</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Imam</surname>
                            <given-names>R</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Using Machine Learning Models to Forecast Severity Level of Traffic Crashes by R Studio and ArcGIS.</article-title>
                    <source>

                        <italic toggle="yes">Front. Built Environ.</italic>
</source>
                    <year>2022</year>;<volume>8</volume>(<issue>April</issue>):<fpage>1</fpage>&#x2013;<lpage>14</lpage>.
                    <pub-id pub-id-type="doi">10.3389/fbuil. 2022.860805</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref6">
                <label>6</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Moghaddam</surname>
                            <given-names>FR</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Afandizadeh</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ziyadi</surname>
                            <given-names>M</given-names>
                        </name>
</person-group>:
                    <article-title>Prediction of accident severity using artificial neural networks.</article-title>
                    <source>

                        <italic toggle="yes">Int. J. Civ. Eng.</italic>
</source>
                    <year>2011</year>;<volume>9</volume>(<issue>1</issue>):<fpage>41</fpage>&#x2013;<lpage>48</lpage>.</mixed-citation>
            </ref>
            <ref id="ref7">
                <label>7</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Mohamed</surname>
                            <given-names>AA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mannering</surname>
                            <given-names>FL</given-names>
                        </name>
</person-group>:
                    <chapter-title>Probabilistic models of traffic accident occurrence and severity.</chapter-title>
                    <source>

                        <italic toggle="yes">Handbook of traffic psychology.</italic>
</source>
                    <publisher-name>Elsevier</publisher-name>;<year>2010</year>; (pp.<fpage>129</fpage>&#x2013;<lpage>143</lpage>).
                    <pub-id pub-id-type="doi">10.1016/B978-0-12-381984-0.00010-5</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref8">
                <label>8</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Hoang Long</surname>
                            <given-names>V</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ahmed</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ma</surname>
                            <given-names>W</given-names>
                        </name>
</person-group>:
                    <article-title>A Random Forest Approach to Predicting Traffic Accident Severity.</article-title>
                    <source>

                        <italic toggle="yes">IEEE Access.</italic>
</source>
                    <year>2021</year>;<volume>9</volume>:<fpage>1219</fpage>&#x2013;<lpage>1232</lpage>.
                    <pub-id pub-id-type="doi">10.1109/ACCESS.2021.3098040</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref9">
                <label>9</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Sze</surname>
                            <given-names>NN</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wong</surname>
                            <given-names>SC</given-names>
                        </name>
</person-group>:
                    <article-title>Diagnostic analysis of the logistic model for pedestrian injury severity in traffic crashes.</article-title>
                    <source>

                        <italic toggle="yes">Accid. Anal. Prev.</italic>
</source>
                    <year>2007</year>;<volume>39</volume>(<issue>6</issue>):<fpage>1267</fpage>&#x2013;<lpage>1278</lpage>.
                    <pub-id pub-id-type="pmid">17920851</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.aap.2007.03.017</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref10">
                <label>10</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Abdel-Aty</surname>
                            <given-names>M</given-names>
                        </name>
</person-group>:
                    <article-title>Analysis of driver injury severity levels at multiple locations using ordered probit models.</article-title>
                    <source>

                        <italic toggle="yes">J. Saf. Res.</italic>
</source>
                    <year>2003</year>;<volume>34</volume>:<fpage>597</fpage>&#x2013;<lpage>603</lpage>.
                    <pub-id pub-id-type="pmid">14733994</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.jsr.2003.05.009</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref11">
                <label>11</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Shiran</surname>
                            <given-names>G</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Imaninasab</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Khayamim</surname>
                            <given-names>R</given-names>
                        </name>
</person-group>:
                    <article-title>Crash Severity Analysis of Highways Based on Multinomial Logistic Regression Model, Decision Tree Techniques, and Artificial Neural Network: A Modeling Comparison.</article-title>
                    <source>

                        <italic toggle="yes">Sustainability.</italic>
</source>
                    <year>2021</year>;<volume>13</volume>(<issue>10</issue>):<fpage>5670</fpage>.
                    <pub-id pub-id-type="doi">10.3390/su13105670</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref12">
                <label>12</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Yan</surname>
                            <given-names>L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Liao</surname>
                            <given-names>W</given-names>
                        </name>
</person-group>:
                    <article-title>Evolutionary hyperparameter optimization for random forest.</article-title>
                    <source>

                        <italic toggle="yes">J. Ambient. Intell. Humaniz. Comput.</italic>
</source>
                    <year>2019</year>;<volume>10</volume>(<issue>7</issue>):<fpage>2801</fpage>&#x2013;<lpage>2810</lpage>.
                    <pub-id pub-id-type="doi">10.1007/s12652-018-0877-6</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref13">
                <label>13</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Snoek</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Larochelle</surname>
                            <given-names>H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Adams</surname>
                            <given-names>RP</given-names>
                        </name>
</person-group>:
                    <article-title>Practical Bayesian optimization of machine learning algorithms.</article-title>
                    <source>

                        <italic toggle="yes">Adv. Neural Inf. Proces. Syst.</italic>
</source>
                    <year>2012</year>;<fpage>2951</fpage>&#x2013;<lpage>2959</lpage>.</mixed-citation>
            </ref>
            <ref id="ref14">
                <label>14</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Singh</surname>
                            <given-names>G</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kumar</surname>
                            <given-names>A</given-names>
                        </name>
</person-group>:
                    <article-title>Random forest-based prediction model for traffic accident severity on Indian highways.</article-title>
                    <source>

                        <italic toggle="yes">Journal of Traffic and Transportation Engineering (English Edition).</italic>
</source>
                    <year>2021</year>;<volume>8</volume>(<issue>6</issue>):<fpage>693</fpage>&#x2013;<lpage>706</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.jtte.2021. 05.012</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref15">
                <label>15</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Patel</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Patel</surname>
                            <given-names>R</given-names>
                        </name>
</person-group>:
                    <article-title>A study on causes of road accidents in India.</article-title>
                    <source>

                        <italic toggle="yes">Int. J. Eng. Res. Appl.</italic>
</source>
                    <year>2013</year>;<volume>3</volume>(<issue>6</issue>):<fpage>1386</fpage>&#x2013;<lpage>1391</lpage>.</mixed-citation>
            </ref>
            <ref id="ref16">
                <label>16</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Yan</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Shen</surname>
                            <given-names>Y</given-names>
                        </name>
</person-group>:
                    <article-title>Traffic Accident Severity Prediction Based on Random Forest.</article-title>
                    <source>

                        <italic toggle="yes">Sustainability (Switzerland).</italic>
</source>
                    <year>2022</year>;<volume>14</volume>(<issue>3</issue>):<fpage>2</fpage>.
                    <pub-id pub-id-type="doi">10.3390/su14031729</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref17">
                <label>17</label>
                <mixed-citation publication-type="other">
                    <collab>Indian Road Congress</collab>:
                    <source>

                        <italic toggle="yes">Guidelines for the Design of At-Grade Intersections on Rural Highways.</italic>
</source>
                    <publisher-loc>New Delhi</publisher-loc>:<year>2012</year>.</mixed-citation>
            </ref>
            <ref id="ref18">
                <label>18</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ramanujam</surname>
                            <given-names>V</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bhalla</surname>
                            <given-names>K</given-names>
                        </name>
</person-group>:
                    <article-title>Speeding on Indian roads: A survey of Indian drivers.</article-title>
                    <source>

                        <italic toggle="yes">Accid. Anal. Prev.</italic>
</source>
                    <year>2009</year>;<volume>41</volume>(<issue>3</issue>):<fpage>527</fpage>&#x2013;<lpage>532</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.aap.2009.01.009</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref19">
                <label>19</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Joshi</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Shaikh</surname>
                            <given-names>T</given-names>
                        </name>
</person-group>:
                    <article-title>Animal-related crashes on national highways in India.</article-title>
                    <source>

                        <italic toggle="yes">Traffic Inj. Prev.</italic>
</source>
                    <year>2017</year>;<volume>18</volume>(<issue>2</issue>):<fpage>120</fpage>&#x2013;<lpage>124</lpage>.
                    <pub-id pub-id-type="doi">10.1080/15389588.2016.1213836</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref20">
                <label>20</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Adele Cutler</surname>
                            <given-names>DRC</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Stevens</surname>
                            <given-names>JR</given-names>
                        </name>
</person-group>:
                    <article-title>Random Forests.</article-title>
                    <year>2012</year>.
                    <pub-id pub-id-type="doi">10.1007/978-1-4419-9326-7_5</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref21">
                <label>21</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Breiman</surname>
                            <given-names>L</given-names>
                        </name>
</person-group>:
                    <article-title>Random forests.</article-title>
                    <source>

                        <italic toggle="yes">Mach. Learn.</italic>
</source>
                    <year>2001</year>;<volume>45</volume>(<issue>1</issue>):<fpage>5</fpage>&#x2013;<lpage>32</lpage>.
                    <pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref22">
                <label>22</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Liaw</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wiener</surname>
                            <given-names>M</given-names>
                        </name>
</person-group>:
                    <article-title>Classification and regression by randomForest.</article-title>
                    <source>

                        <italic toggle="yes">R News.</italic>
</source>
                    <year>2002</year>;<volume>2</volume>(<issue>3</issue>):<fpage>18</fpage>&#x2013;<lpage>22</lpage>.</mixed-citation>
            </ref>
            <ref id="ref23">
                <label>23</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>El-Basyouny</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Sayed</surname>
                            <given-names>T</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Abdel-Aty</surname>
                            <given-names>M</given-names>
                        </name>
</person-group>:
                    <article-title>Predicting accident occurrence and severity on arterials using random parameter and random effect models.</article-title>
                    <source>

                        <italic toggle="yes">Accid. Anal. Prev.</italic>
</source>
                    <year>2010</year>;<volume>42</volume>(<issue>3</issue>):<fpage>718</fpage>&#x2013;<lpage>727</lpage>.</mixed-citation>
            </ref>
            <ref id="ref24">
                <label>24</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Sokolova</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lapalme</surname>
                            <given-names>G</given-names>
                        </name>
</person-group>:
                    <article-title>A systematic analysis of performance measures for classification tasks.</article-title>
                    <source>

                        <italic toggle="yes">Inf. Process. Manag.</italic>
</source>
                    <year>2009</year>;<volume>45</volume>(<issue>4</issue>):<fpage>427</fpage>&#x2013;<lpage>437</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.ipm.2009.03.002</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref25">
                <label>25</label>
                <mixed-citation publication-type="data">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Khanum</surname>
                            <given-names>H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Garg</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Faheem</surname>
                            <given-names>MI</given-names>
                        </name>
</person-group>:
                    <data-title>Data for Accident Severity Prediction Modelling for Indian Highways Case Study (Accidentdata_V1).</data-title>[Data set].
                    <source>

                        <italic toggle="yes">Zenodo.</italic>
</source>
                    <year>2023</year>.
                    <pub-id pub-id-type="doi">10.5281/zenodo.7773156</pub-id>
                </mixed-citation>
            </ref>
        </ref-list>
    </back>
    <sub-article article-type="reviewer-report" id="report173654">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.146596.r173654</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Sinha</surname>
                        <given-names>Sanjeev</given-names>
                    </name>
                    <xref ref-type="aff" rid="r173654a1">1</xref>
                    <role>Referee</role>
                </contrib>
                <aff id="r173654a1">
                    <label>1</label>Department of Civil Engineering, National Institute of Technology Patna, Patna, Bihar, India</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>5</day>
                <month>10</month>
                <year>2023</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2023 Sinha S</copyright-statement>
                <copyright-year>2023</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport173654" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.133594.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve-with-reservations</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>The work is clearly and accurately presented however, the motivation behind the work and its academic contribution needs to be highlighted.&#x00a0;&#x00a0;</p>
            <p> </p>
            <p> Though relevant literature has been cited, it has not been described for its application in the present study. Further, there is some literature which seems not to be correctly referred and cited. As example the references, 7, 14 and 15 needs to be re-checked.&#x00a0;&#x00a0;&#x00a0;</p>
            <p> </p>
            <p> The title of the work is development of prediction modeling for accident severity, the choice of factors, their contribution and transferability of the model developed needs to be highlighted.</p>
            <p> </p>
            <p> The literature review shows several works related to the topic thus novelty of the work also need to be highlighted.</p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>Partly</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>Partly</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Partly</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>Yes</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>Partly</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Partly</p>
            <p>Reviewer Expertise:</p>
            <p>Traffic Engineering, Transportation Planning, Highway Materials, Logistics</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above.</p>
        </body>
        <sub-article article-type="response" id="comment10380-173654">
            <front-stub>
                <contrib-group>
                    <contrib contrib-type="author">
                        <name>
                            <surname>Khanum</surname>
                            <given-names>Humera</given-names>
                        </name>
                    </contrib>
                </contrib-group>
                <author-notes>
                    <fn fn-type="conflict">
                        <p>
                            <bold>Competing interests: </bold>NO COMPETING INTERESTS</p>
                    </fn>
                </author-notes>
                <pub-date pub-type="epub">
                    <day>12</day>
                    <month>10</month>
                    <year>2023</year>
                </pub-date>
            </front-stub>
            <body>
                <p>
                    <bold>Comment 1:&#x00a0;</bold>The work is clearly and accurately presented however, the motivation behind the work and its academic contribution needs to be highlighted.</p>
                <p> 
                    <bold>Author's Response:&#x00a0;</bold>Thank you for your constructive feedback. We agree that highlighting the motivation and academic contribution of this work is crucial for a well-rounded understanding of the study. In the revised manuscript, we have included a section in the Introduction where we elaborate on the motivation behind this work, particularly focusing on the rising number of road accidents and the necessity to develop predictive models to understand and mitigate accident severity. Additionally, we have outlined the academic contribution of our study in the Discussion section, emphasizing the novelty and practical implications of our findings.</p>
                <p> </p>
                <p> 
                    <bold>Comment 2:&#x00a0;</bold>Though relevant literature has been cited, it has not been described for its application in the present study. Further, there is some literature which seems not to be correctly referred and cited. As example the references, 7, 14 and 15 needs to be re-checked.&#x00a0;&#x00a0;</p>
                <p> 
                    <bold>Author's Response:&#x00a0;</bold>We appreciate your attention to detail. We have revisited the cited literature, especially references 7, 14, and 15, and corrected the citations as suggested. Moreover, we have expanded the section to better explain the relevance and application of the cited works in the context of our study.</p>
                <p> </p>
                <p> 
                    <bold>Comment 3:&#x00a0;</bold>The title of the work is Development of prediction modeling for accident severity, the choice of factors, their contribution and transferability of the model developed needs to be highlighted.</p>
                <p> 
                    <bold>Author's Response:&#x00a0;</bold>Thank you for your suggestion. In the revised manuscript, we have dedicated a subsection within the Methodology section to discuss the choice of factors, their contribution, and the transferability of the developed model. &#x00a0;</p>
                <p> </p>
                <p> 
                    <bold>Comment 4:</bold>The literature review shows several works related to the topic thus novelty of the work also need to be highlighted.</p>
                <p> 
                    <bold>Author's Response:&#x00a0;</bold>We value your input on highlighting the novelty of our work. In the revised manuscript, we have included a paragraph at the end in the conclusion section where we delineate the novelty of our study in comparison to existing literature. We have emphasized the innovative aspects of our predictive model and how our work contributes uniquely to the understanding and prediction of accident severity.</p>
            </body>
        </sub-article>
    </sub-article>
    <sub-article article-type="reviewer-report" id="report202786">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.146596.r202786</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Zubaidi</surname>
                        <given-names>Hamsa</given-names>
                    </name>
                    <xref ref-type="aff" rid="r202786a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0003-1311-2098</uri>
                </contrib>
                <aff id="r202786a1">
                    <label>1</label>Roads and Transport Engineering Department, University of Al-Qadisiyah, Diwaniyah, Al-Q&#x0101;disiyyah Governorate, Iraq</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>18</day>
                <month>9</month>
                <year>2023</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2023 Zubaidi H</copyright-statement>
                <copyright-year>2023</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport202786" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.133594.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve-with-reservations</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>
                <list list-type="order">
                    <list-item>
                        <p>The author mentioned "accident" in the paper,&#x00a0;&#x00a0;"Accident" is a broader term that implies an unintentional or unforeseen incident. It can encompass various events, including crashes, collisions, or other incidents resulting from human error, mechanical failure, or environmental factors. "Crash" is a more specific term that typically refers to a collision or impact between vehicles, objects, or individuals. It often implies a sudden and forceful event.</p>
                        <p> </p>
                        <p> Ultimately, the choice between "accident" and "crash" should be based on the specific details and nature of the event being described in the paper, and relying on that "accident" should be replaced by "crash".</p>
                    </list-item>
                    <list-item>
                        <p>The author should add a random forest formula in the methodology.</p>
                    </list-item>
                    <list-item>
                        <p>It might be better if there is a Gini impurity test in the paper to understand the importance of explanatory variables.</p>
                    </list-item>
                </list>
            </p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>Yes</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>Yes</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Partly</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>Yes</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>Partly</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Partly</p>
            <p>Reviewer Expertise:</p>
            <p>Tansportation Safety</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above.</p>
        </body>
        <sub-article article-type="response" id="comment10315-202786">
            <front-stub>
                <contrib-group>
                    <contrib contrib-type="author">
                        <name>
                            <surname>Khanum</surname>
                            <given-names>Humera</given-names>
                        </name>
                    </contrib>
                </contrib-group>
                <author-notes>
                    <fn fn-type="conflict">
                        <p>
                            <bold>Competing interests: </bold>NO Competing Interests</p>
                    </fn>
                </author-notes>
                <pub-date pub-type="epub">
                    <day>29</day>
                    <month>9</month>
                    <year>2023</year>
                </pub-date>
            </front-stub>
            <body>
                <p>
                    <bold>Comment:1 </bold>The author mentioned "accident" in the paper, &#x201c;Accident" is a broader term that implies an unintentional or unforeseen incident. It can encompass various events, including crashes, collisions, or other incidents resulting from human error, mechanical failure, or environmental factors. "Crash" is a more specific term that typically refers to a collision or impact between vehicles, objects, or individuals. It often implies a sudden and forceful event.</p>
                <p> Ultimately, the choice between "accident" and "crash" should be based on the specific details and nature of the event being described in the paper, and relying on that "accident" should be replaced by "crash".</p>
                <p> </p>
                <p> 
                    <bold>RESPONSE:&#x00a0;</bold>Thank you for your insightful comments and for bringing up the concern regarding the usage of the term "accident" in our paper. Your observation about the broad implication of "accident" and the specificity of "crash" is indeed valid.</p>
                <p> </p>
                <p> I would like to clarify that in the context of our paper, the mention of "accidents" is based on Indian data, which is commonly referred to as road traffic accident data in official and legal documents in India. The term "accident" here encompasses various events, including crashes, collisions, or other incidents resulting from human error, mechanical failure, or environmental factors, as per the definition used in the Indian context.</p>
                <p> </p>
                <p> All the parameters mentioned in the paper have been taken into consideration, and the terminology used is consistent with the data source and regional context. We appreciate the suggestion to use the term "crash" for more specificity and will certainly consider this in our future research and writings, ensuring that the terminology aligns with the global standards and is clear to all readers.</p>
                <p> </p>
                <p> 
                    <bold>Comment 2:</bold>&#x00a0;The author should add a random forest formula in the methodology.</p>
                <p> </p>
                <p> 
                    <bold>RESPONSE:&#x00a0;</bold>The random forest formula is added to the methods section as below:</p>
                <p> Random Forest Algorithm Formulation: The Random Forest algorithm can be represented as:</p>
                <p> [RF(X) = (1/B) * &#x03a3;(T_b(X)) from b=1 to B]</p>
                <p> where X are the input features, B is the number of trees, and T_b(X) is the prediction of the b-th individual decision tree.</p>
                <p> </p>
                <p> 
                    <bold>Comment 3:</bold>&#x00a0;It might be better if there is a Gini impurity test in the paper to understand the importance of explanatory variables.</p>
                <p> </p>
                <p> 
                    <bold>RESPONSE:&#x00a0;</bold>The random forest formula is added to the methods section as below:</p>
                <p> &#x00a0;In the construction of the Random Forest model for predicting accident severity, Gini impurity is employed as a criterion to evaluate the significance of different explanatory variables. Gini impurity, a measure utilized within the framework of decision trees (the base learners in a Random Forest), is crucial for the optimal selection of features at each node split. It offers a quantitative metric to discern the effectiveness of a variable in segregating the target classes.</p>
                <p> Mechanism of Gini Impurity: In the context of binary classification, the Gini impurity for a node is calculated as:</p>
                <p> 
                    <italic>I</italic>
                    <inline-graphic xlink:href="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAgAAAAhCAIAAABrzn0iAAAAAXNSR0IArs4c6QAAAAlwSFlzAAASdAAAEnQB3mYfeAAAAI9JREFUOE9j/P//PwM2wIRVFCg4KoERMlQMEgZgfIDBtjSENWnbgJEEk/j///YEsFzattsgMRTLr4DEAzxVwHoROsCGgUwBA4SOO7eAGqy0VGFWwXTcnmAFFJ8ANh9Fx+1rxxgYdNQg5iPZgWoBwg50C2CuQrcArgPDAkiCuzOxZRZmgkQOJCTX/mekQ6IGAFX2jhBYFZ/AAAAAAElFTkSuQmCC"/>
                    <sub>G</sub>
                    <italic>(p)=</italic>
                    <italic>k=1</italic>
                    <italic>n</italic>
                    <italic>p</italic>
                    <italic>2</italic>k
                    <inline-graphic xlink:href="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAIsAAAApCAIAAACKr01NAAAAAXNSR0IArs4c6QAAAAlwSFlzAAASdAAAEnQB3mYfeAAABChJREFUeF7tW6t2o0AYJvssTUVPn4A8AVtTtbaOyMbErVxXAzJ1tVExIWZt8gQ5Kwrvkp0bMPcLUBhOZxRN5g8/3/ffhy5ut1sUlscI/PBYt6AaRCAw5LsdBIYCQ74j4Lt+wYcCQ74j4Lt+wYcCQ74j4Lt+wYcCQ74j4Lt+wYcCQ74gUOXr1WKxWJ+iiFyu4LX367v4UJW/RdtzkUbXw3p9vN+eb0V6eT/MgSIw2/42q8ziKEoL9LzwOs5K/599bj50WoNIZbdWecWFsPLfJUqfE/QpuI5/Pd15H+RmN9tOdiBSoUV8QXSCskiBq0jW6fDeEASvH+/HJKg64UQIl1MG9N/NBQ2NHKEQJsQwOq6R67LIMhz0vnrBG6Y4qAIL0hkYr0nEfwCUTuOY/JhBb3BbsLUoR4/mRo5kWQYKNY6H/WyiRMRpgt1dpQvLENQbQu5gUSimABkHkSG2Ii9xMcUhbjrQbzAE1VWLDUNuzkdrCyVHN8e5cgQsmsdKGpRrgBsf6odyP+mOtjlDjiA9YryxYUi7xwZAutWw2T/MnnlxJKeH9GaGKCevfWDN0OawpoiVJ53eHHfjzBOOUH2lxwqEmTa6AbKobpnBrq6CYPEASwIc5SQpCEjB74lvlFmKCwiMiKwwMKWxGktdj9gpmZFHGr1YaU3KAisKd7F046yb/RMxpLF//BXlNuhWMiQncqLGwDQ9bDf3dJayx4r7aQY6GKtofA1Tn+q4v0TR4+8dHpV4upItsqOH5aT6DYAVmDusln8ePs6v7bTDwBCcZMXZtuWn+rwCGMadl5hwr/KXzSXOPqjHMol8xfc9sbpslovlzw1wCHbpGUJ80HRgO6mnj07PWeX1VEo9+BSHneZbeMJP1BcrFNngRAhQxRxcqSoFaX5SJiF5teEcyd0FpqnyJXoKaViHlTYPoVqszUS4llNkeVyAkCkQKChVZZxxcuGOvJ2EqX60+5VBdrlgxd+QL5CZ3yIzBSlFaFCQkkIflefqE68pbNkjflC/YosVSxDdheCOgf2EmfqwfY4L6ONPfToU9+TJOzZOqGNXybpg5eiy1GybN0l71LEBjTneNvIjzI+baN6hKwb9Ojhm0cV4e6wc+QH/f8dI0KcPljFkitMHs2qQQQkVDW91i29nViU6AZOTrqu0nNmQCYgneKinTbO/+ABGe5aHbGv0EzwbfuSDD8gb4YQ5CVMNpFj+1AwRectzT0feBIYc5cfebjPdIz2U4EO1Y8ExZ+Z0TNk4ip3PDQrKzN71Ob2JTbeqqRUGH7Dpx537/uEpGfMtEnPfrd4xM4aSnb198rNE9KoPKGdg274/0m9qqcYdXryTOjOGehgjnMqg4WryTCiq8hy/c3r3epYSz3CMpjrXT/4dvB4aWYra2+TMd1InaDiZ2fcH/OHOqNnoP+mTm8my34sjAAAAAElFTkSuQmCC"/>
                </p>
                <p> where, 
                    <italic>pk</italic>&#x200b; is the proportion of samples classified to class 
                    <italic>k</italic> at that node, and the summation operates over all classes. A lower Gini impurity score suggests a higher purity of the node, indicating an enhanced classification.</p>
                <p> </p>
                <p> Gini Importance in Random Forest: In the developed Random Forest model, the Gini impurity plays a dual role: 
                    <bold>Node Splitting</bold>: It aids in the identification of the most significant variable at each node by evaluating the potential reduction in impurity for each split, and 
                    <bold>Feature Importance</bold>: Post model training, the average decrease in impurity caused by each feature across all trees is computed, known as Gini importance. This metric offers insights into the relative significance of different features for the prediction task.</p>
            </body>
        </sub-article>
    </sub-article>
</article>
