<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN" "http://jats.nlm.nih.gov/publishing/1.2/JATS-journalpublishing1.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="1.2" xml:lang="en">
    <front>
        <journal-meta>
            <journal-id journal-id-type="pmc">F1000Research</journal-id>
            <journal-title-group>
                <journal-title>F1000Research</journal-title>
            </journal-title-group>
            <issn pub-type="epub">2046-1402</issn>
            <publisher>
                <publisher-name>F1000 Research Limited</publisher-name>
                <publisher-loc>London, UK</publisher-loc>
            </publisher>
        </journal-meta>
        <article-meta>
            <article-id pub-id-type="doi">10.12688/f1000research.17555.1</article-id>
            <article-categories>
                <subj-group subj-group-type="heading">
                    <subject>Research Article</subject>
                </subj-group>
                <subj-group>
                    <subject>Articles</subject>
                </subj-group>
            </article-categories>
            <title-group>
                <article-title>An interpretable machine learning model of biological age</article-title>
                <fn-group content-type="pub-status">
                    <fn>
                        <p>[version 1; peer review: 2 approved with reservations]</p>
                    </fn>
                </fn-group>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Wood</surname>
                        <given-names>Thomas R.</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Supervision</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-1130-2720</uri>
                    <xref ref-type="corresp" rid="c1">a</xref>
                    <xref ref-type="aff" rid="a1">1</xref>
                    <xref ref-type="aff" rid="a2">2</xref>
                    <xref ref-type="aff" rid="a3">3</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Kelly</surname>
                        <given-names>Christopher</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Software</role>
                    <role content-type="http://credit.niso.org/">Visualization</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Roberts</surname>
                        <given-names>Megan</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Walsh</surname>
                        <given-names>Bryan</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a4">4</xref>
                </contrib>
                <aff id="a1">
                    <label>1</label>Department of Pediatrics, University of Washington, Seattle, Washington, 98195, USA</aff>
                <aff id="a2">
                    <label>2</label>Nourish Balance Thrive, Redding, Californa, USA</aff>
                <aff id="a3">
                    <label>3</label>Institute for Human and Machine Cognition, Penscola, Florida, USA</aff>
                <aff id="a4">
                    <label>4</label>University of Western States, Portland, Oregon, USA</aff>
            </contrib-group>
            <author-notes>
                <corresp id="c1">
                    <label>a</label>
                    <email xlink:href="mailto:tommyrw@uw.edu">tommyrw@uw.edu</email>
                </corresp>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>The authors are all co-founders of an online commercial tool, bloodcalculator.com, developed to assist in the analysis of blood test results. The predicted age algorithm described in the manuscript is online and freely-available through this tool.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>4</day>
                <month>1</month>
                <year>2019</year>
            </pub-date>
            <pub-date pub-type="collection">
                <year>2019</year>
            </pub-date>
            <volume>8</volume>
            <elocation-id>17</elocation-id>
            <history>
                <date date-type="accepted">
                    <day>21</day>
                    <month>12</month>
                    <year>2018</year>
                </date>
            </history>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2019 Wood TR et al.</copyright-statement>
                <copyright-year>2019</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access article distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <self-uri content-type="pdf" xlink:href="https://f1000research.com/articles/8-17/pdf"/>
            <abstract>
                <p>
                    <bold>Background:</bold> Assessments of biological (rather than chronological) age derived from patient biochemical data have been shown to strongly predict both all-cause and disease-specific mortality. However, these population-based approaches have yet to be translated to the individual. As well as using biological age as a research tool, by being able to better answer the question &#x201c;why did we get this result?&#x201d;, clinicians may be able to apply personalised interventions that could improve the long-term health of individual patients.</p>
                <p>
                    <bold>Methods:</bold> Here, the boosted decision tree algorithm XGBoost was used to predict biological age using 39 commonly-available blood test results from the US National Health and Nutrition Examination Survey (NHANES) database.</p>
                <p>
                    <bold>Results:</bold> Interrogation of the algorithm produced a description of how each marker contributed to the final output in a single individual. Additive explanation plots were then used to determine biomarker ranges associated with a lower biological age. Importantly, a number of markers that are modifiable with lifestyle changes were found to have a significant effect on biological age, including fasting blood glucose, lipids, and markers of red blood cell production.</p>
                <p>
                    <bold>Conclusions:</bold> The combination of individualised outputs with target ranges could provide the ability to personalise interventions or recommendations based on an individual&#x2019;s biochemistry and resulting predicted age. This would allow for the investigation of interventions designed to improve health and longevity in a targeted manner, many of which could be rooted in targeted lifestyle modifications.</p>
            </abstract>
            <kwd-group kwd-group-type="author">
                <kwd>Aging</kwd>
                <kwd>Machine Learning</kwd>
                <kwd>Age</kwd>
            </kwd-group>
            <funding-group>
                <funding-statement>The author(s) declared that no grants were involved in supporting this work.</funding-statement>
            </funding-group>
        </article-meta>
    </front>
    <body>
        <sec sec-type="intro">
            <title>Introduction</title>
            <p>One of the fastest-growing areas at the intersection of clinical medicine and data science is the investigation of human aging
                <sup>
                    <xref ref-type="bibr" rid="ref-1">1</xref>
                </sup>, with multiple avenues being explored to find biomarkers of aging that could be used to inform efforts to enhance human longevity
                <sup>
                    <xref ref-type="bibr" rid="ref-2">2</xref>&#x2013;
                    <xref ref-type="bibr" rid="ref-4">4</xref>
                </sup>. If robust and easily-accessible biomarkers of aging are identified, they could assist in the rapid assessment of promising interventions aimed at increasing longevity, without the need to perform clinical trials that last decades. For instance, epigenetic modifications on DNA are increasingly being used to determine biological (rather than chronological) age, including how environmental determinants may affect an epigenetic signal for longevity
                <sup>
                    <xref ref-type="bibr" rid="ref-4">4</xref>
                </sup>.
</p>
            <p>An individual&#x2019;s biological age can be described based on the assumption that cellular aging processes, which are highly-influenced by the environment
                <sup>
                    <xref ref-type="bibr" rid="ref-5">5</xref>
                </sup>, occur at different rates in different people with the same chronological age. As these ageing processes are associated with changes in routine biochemical measures
                <sup>
                    <xref ref-type="bibr" rid="ref-6">6</xref>
                </sup>, algorithmic determination of biological or phenotypic age using widely-available indices such as those from blood test results is therefore becoming increasingly common. This has previously been done using both machine learning (ML) and statistical techniques
                <sup>
                    <xref ref-type="bibr" rid="ref-3">3</xref>,
                    <xref ref-type="bibr" rid="ref-6">6</xref>
                </sup>.
</p>
            <p>One important aspect for the utility of biological age measures is that a given output can be interpreted in order to guide individualized interventions. ML-based predictions of biological age have the potential to elucidate and describe complex, non-linear, and unintuitive patterns in biochemical data, which may provide greater predictive power compared to other statistical techniques. To date, published approaches to generate predicted biological age from biochemical data have used deep neural networks (DNNs), with the output being directly associated with mortality risk
                <sup>
                    <xref ref-type="bibr" rid="ref-3">3</xref>
                </sup>. However, while individual outputs from DNNs are interpretable
                <sup>
                    <xref ref-type="bibr" rid="ref-7">7</xref>
                </sup>, it is currently not possible to interrogate the effects of the entire training dataset on the model output, which may be important for determining how one may intervene given an individual&#x2019;s output.</p>
            <p>As a result of the issues with interpreting certain ML algorithms, the field of explainable artificial intelligence is developing rapidly
                <sup>
                    <xref ref-type="bibr" rid="ref-8">8</xref>
                </sup>. If such approaches can be successfully applied to determining biological age from commonly available data, biological signatures of aging could be more rapidly discovered and tracked, including the ability to personalise interventions based on the outputs of the model. Here, we describe the development of an explainable ML model using blood marker data from the National Health and Nutrition Examination Survey (NHANES) database to predict biological age, as well as provide individual weighting for how each biomarker affected the final output. By determining how markers affect the model globally, potential target reference ranges associated with lower biological age can also be determined.</p>
        </sec>
        <sec sec-type="methods">
            <title>Methods</title>
            <sec>
                <title>Input data</title>
                <p>Data from a total of 46,739 participants (n=22,545 males and n=24,194 females) in the NHANES database were included, with a mean (range) age of 48.5 (19.0&#x2013;85.0) years. A total of 39 common blood markers were used: complete blood count (CBC) with differential, lipids, fasting glucose, iron panel, and a comprehensive metabolic panel (including electrolytes, and liver and kidney function). Descriptive data for the dataset is listed in 
                    <xref ref-type="table" rid="T1">Table 1</xref>.</p>
                <table-wrap id="T1" orientation="portrait" position="anchor">
                    <label>Table 1. </label>
                    <caption>
                        <title>Demographic data from the entire NHANES dataset.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="2" valign="middle">Variable</th>
                                <th align="center" colspan="2" rowspan="1" valign="top">Females
                                    <break/>(n=24,194)</th>
                                <th align="center" colspan="2" rowspan="1" valign="top">Males
                                    <break/>(n=22,545)</th>
                            </tr>
                            <tr>
                                <th align="center" colspan="1" rowspan="1" valign="top">Mean</th>
                                <th align="center" colspan="1" rowspan="1" valign="top">SD</th>
                                <th align="center" colspan="1" rowspan="1" valign="top">Mean</th>
                                <th align="center" colspan="1" rowspan="1" valign="top">SD</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <italic toggle="yes">Chronological Age (years)</italic>
                                </td>
                                <td align="center" colspan="1" rowspan="1" valign="top">48.1</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">18.8</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">48.8</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">18.8</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Red blood cells (&#x00d7;10
                                    <sup>3</sup>/&#x00b5;l)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">4.4</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.4</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">4.9</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.5</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Red blood cell distribution width (%)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">13.2</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">1.5</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">13.0</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">1.1</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Hematocrit (%)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">39.2</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">3.6</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">44.2</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">3.8</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Hemoglobin (g/dl)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">13.3</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">1.3</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">15.0</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">1.3</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Mean corpuscular hemoglobin (pg)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">30.1</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">2.5</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">30.6</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">2.2</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Mean corpuscular hemoglobin concentration (g/dl)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">33.9</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">1.0</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">34.0</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">1.0</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Mean corpuscular volume (fl)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">88.9</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">6.1</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">90.1</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">5.3</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Platelets (&#x00d7;10
                                    <sup>3</sup>/&#x00b5;l)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">267.9</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">70.7</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">238.0</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">61.1</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Mean platelet volume (fl)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">8.2</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.9</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">8.2</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.9</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Neutrophils (&#x00d7;10
                                    <sup>3</sup>/&#x00b5;l)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">4.4</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">1.8</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">4.2</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">1.8</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Lymphocytes (&#x00d7;10
                                    <sup>3</sup>/&#x00b5;l)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">2.2</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">1.0</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">2.1</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">1.5</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Monocytes (&#x00d7;10
                                    <sup>3</sup>/&#x00b5;l)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.5</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.2</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.6</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.2</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Eosinophils (&#x00d7;10
                                    <sup>3</sup>/&#x00b5;l)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.2</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.2</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.2</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.2</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Basophils (&#x00d7;10
                                    <sup>3</sup>/&#x00b5;l)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.0</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.1</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.0</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.1</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Total Cholesterol (mg/dl)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">199.0</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">42.5</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">192.4</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">42.9</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Low-density lipoprotein cholesterol (mg/dl)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">115.7</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">35.8</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">115.7</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">36.1</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">High-density lipoprotein cholesterol (mg/dl)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">57.5</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">16.4</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">47.8</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">14.2</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Triglycerides (mg/dl)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">129.0</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">101.6</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">146.9</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">136.6</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Glucose (mg/dl)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">98.8</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">37.1</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">103.4</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">39.5</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Iron (&#x00b5;g/dl)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">77.3</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">34.4</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">92.9</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">35.8</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Total Iron Binding Capacity (&#x00b5;g/dl)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">380.9</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">71.7</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">351.9</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">55.0</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Ferritin (ng/ml)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">78.1</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">103.6</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">183.5</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">180.3</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Sodium (mmol/l)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">138.9</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">2.4</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">139.3</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">2.3</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Potassium (mmol/l)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">3.9</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.3</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">4.1</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.3</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Chloride (mmol/l)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">103.8</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">3.0</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">103.3</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">2.9</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Carbon Dioxide (mmol/l)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">24.3</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">2.4</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">25.1</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">2.2</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Calcium (mg/dl)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">9.4</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.4</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">9.5</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.4</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Phosphorus (mg/dl)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">3.8</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.5</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">3.7</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.6</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Creatinine (mg/dl)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.8</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.4</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">1.0</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.5</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Urea nitrogen (mg/dl)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">12.6</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">6.0</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">14.4</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">6.1</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Albumin (g/dl)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">4.1</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.4</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">4.4</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.3</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Globulins (g/dl)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">3.0</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.5</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">2.9</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.5</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Alanine transaminase (IU/l)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">21.5</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">20.5</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">29.5</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">27.8</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Aspartate transaminase (IU/l)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">23.6</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">15.1</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">27.8</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">22.2</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Alkaline Phosphatase (IU/l)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">71.1</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">27.8</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">71.8</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">26.6</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Bilirubin (mg/dl)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.6</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.3</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.8</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.3</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Gamma glutamyl-transferase (IU/l)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">24.3</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">35.7</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">34.8</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">52.1</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Lactate dehydrogenase (IU/l)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">131.6</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">29.7</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">133.1</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">35.8</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Uric Acid (mg/dl)</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">4.8</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">1.3</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">6.1</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">1.3</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
            </sec>
            <sec>
                <title>Model generation</title>
                <p> NHANES data (all available individuals with the 39 markers listed in 
                    <xref ref-type="table" rid="T1">Table 1</xref> from years 1999&#x2013;2015) was downloaded as .xpt files from the NHANES website using their in-built web search engine. The data was then concatenated, cross-tabulated, and stratified by gender. A random split in the data set was created to withhold 20% of participants (n=4,509 males and n=4,839) for model validation. The remaining 80% of the dataset was used to train an XGBRegressor model (XGBoost version 0.81) using chronological age and the 39 biochemical input markers. For the remaining 20% of the data, the 39 markers were provided to the algorithm
                    <sup>
                        <xref ref-type="bibr" rid="ref-9">9</xref>
                    </sup> with the chronological age withheld, and the resulting dependent variable &#x201c;predicted age&#x201d; defined as a measure of biological age. Age predictions for the withheld data were plotted against actual age using 
                    <ext-link ext-link-type="uri" xlink:href="https://seaborn.pydata.org/generated/seaborn.jointplot.html">jointplot</ext-link> from the 
                    <ext-link ext-link-type="uri" xlink:href="https://seaborn.pydata.org/installing.html">seaborn</ext-link> Python library (version 0.9.0).</p>
            </sec>
            <sec>
                <title>Model interrogation</title>
                <p>For individual predictions, the weight of each marker was extracted using 
                    <ext-link ext-link-type="uri" xlink:href="https://eli5.readthedocs.io/en/latest/libraries/xgboost.html">ELI5</ext-link> (version 0.8.1), and graphed using a 
                    <ext-link ext-link-type="uri" xlink:href="https://github.com/chrispaulca/waterfall">waterfall</ext-link> chart (version 3.8). For a given age prediction, each marker was individually weighted with regard to how it contributed to the final output. 
                    <ext-link ext-link-type="uri" xlink:href="https://github.com/slundberg/shap">Shapley additive explanations</ext-link> plots (SHAP, version 0.26.0) were constructed to describe how each individual marker affects the predicted age output within the laboratory normal range.</p>
            </sec>
            <sec>
                <title>Worked example</title>
                <p>To provide an individual output example based on data not seen by the algorithm
                    <sup>
                        <xref ref-type="bibr" rid="ref-9">9</xref>
                    </sup> previously, author C.K. had the necessary input markers measured by Quest Laboratories (Santa Cruz, CA). As C.K. is an author who ran his own data through the algorithm
                    <sup>
                        <xref ref-type="bibr" rid="ref-9">9</xref>
                    </sup> he trained during development of the manuscript, institutional ethical approval was not sought for publication of this data. C.K. approved the publication of his data in this manner.</p>
            </sec>
        </sec>
        <sec sec-type="results">
            <title>Results</title>
            <sec>
                <title>Differences between predicted age and biological age</title>
                <p>Linear regression analysis (
                    <xref ref-type="fig" rid="f1">Figure 1</xref>) showed a significant correlation between predicted (biological) and actual (chronological) age (r=0.77 and 0.75 in females and males, respectively; p&lt;0.0001 for both). However, discrepancies between the biological and chronological age could be considered clinically relevant, as they would allow for the generation of a signature of premature biological aging.</p>
                <fig fig-type="figure" id="f1" orientation="portrait" position="float">
                    <label>Figure 1. </label>
                    <caption>
                        <title>Linear regression analysis comparing actual (chronological) and predicted (biological) age.</title>
                        <p>Data shown for women (
                            <bold>A</bold>) and men (
                            <bold>B</bold>) using the 20% withheld data (n=4,509 males and n=4,839). A significant correlation between predicted and actual age (r=0.77 and 0.75 in females and males, respectively) was seen in both sexes (p&lt;0.0001).</p>
                    </caption>
                    <graphic orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/19198/6907fdc4-e604-4a5c-aab6-288fc8b45cf1_Figure1.gif"/>
                </fig>
            </sec>
            <sec>
                <title>SHAP plots of input markers</title>
                <p>SHAP summary plots (
                    <xref ref-type="fig" rid="f2">Figure 2</xref>) were used to determine which markers have the greatest influence on predicted biological age. The top 20 markers in terms of importance are shown. In females, blood urea nitrogen (BUN) had the greatest influence on biological age, with albumin the most influential marker in men. Fasting glucose was the second most influential marker in both sexes (
                    <xref ref-type="fig" rid="f2">Figure 2</xref>). SHAP plots for each of the 20 most influential markers are available on 
                    <ext-link ext-link-type="uri" xlink:href="https://github.com/cck197/ml-bio-age/wiki/SHAP-plots-showing-the-adjustment-to-predicted-age">GitHub</ext-link> and Zenodo
                    <sup>
                        <xref ref-type="bibr" rid="ref-9">9</xref>
                    </sup>. Based on each of these 20 markers, the level at which an inflection point was seen in the SHAP plot (i.e. when a further change in a marker would result in a net increase in predicted biological age) was determined, as well as the estimated range over which each marker would be associated with the lowest biological age (
                    <xref ref-type="table" rid="T2">Table 2</xref> and 
                    <xref ref-type="table" rid="T3">Table 3</xref>). Using the five most influential markers as an example, the lowest predicted age in women would be associated with a BUN 6&#x2013;11 mg/dl, fasting glucose 71&#x2013;86 mg/dl, bicarbonate (carbon dioxide) 19&#x2013;22 mmol/l, total cholesterol 130&#x2013;150 mg/dl, and mean corpuscular volume (MCV) 80&#x2013;85 fl. In men, the lowest predicted age would be associated with albumin 4.6&#x2013;4.8 g/dl, fasting glucose 70&#x2013;88 mg/dl, BUN 6&#x2013;12 mg/dl, red blood cell (RBC) 5.0&#x2013;5.7 &#x00d7;10
                    <sup>3</sup>/&#x00b5;l, and RBC distribution width (RDW) 11.0&#x2013;12.5%.</p>
                <fig fig-type="figure" id="f2" orientation="portrait" position="float">
                    <label>Figure 2. </label>
                    <caption>
                        <title>SHAP summary plots showing the adjustment to predicted age (x-axis) for each of the top 20 markers.</title>
                        <p>Data shown for women (
                            <bold>A</bold>) and men (
                            <bold>B</bold>). Each plot is made up of thousands of individual points from the training dataset such with a higher value being more red, and a lower value being more blue. This is depicted by the &#x201c;feature value&#x201d; bar on the right of each plot. Therefore, if the dots on one side of the central line are increasingly red or blue, that suggests that increasing values or decreasing values, respectively, move the predicated age in that direction. For instance, lower BUN values (blue dots) are associated with lower predicted age in both men and women.</p>
                    </caption>
                    <graphic orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/19198/6907fdc4-e604-4a5c-aab6-288fc8b45cf1_Figure2.gif"/>
                </fig>
                <table-wrap id="T2" orientation="portrait" position="anchor">
                    <label>Table 2. </label>
                    <caption>
                        <title>Top 20 markers affecting predicted age in women.</title>
                        <p>Ranking of markers affecting predicted age in women, in order of importance, as determined by the SHAP summary outputs. Visual examination of the individual SHAP plots for each marker was used to estimate the range over which each marker would result in the lowest predicted age, and the magnitude of the adjustment in years. The final column is the value at which a marker changes from a net negative to net positive effect on biological age.</p>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="center" colspan="1" rowspan="1">Marker Rank</th>
                                <th align="left" colspan="1" rowspan="1">Marker</th>
                                <th align="center" colspan="1" rowspan="1">Estimated range for
                                    <break/>lowest predicted age</th>
                                <th align="center" colspan="1" rowspan="1">Magnitude of
                                    <break/>effect (years)</th>
                                <th align="center" colspan="1" rowspan="1">Inflection
                                    <break/>point</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="center" colspan="1" rowspan="1">1</td>
                                <td align="left" colspan="1" rowspan="1">BUN</td>
                                <td align="center" colspan="1" rowspan="1">6&#x2013;11 mg/dl</td>
                                <td align="center" colspan="1" rowspan="1">-9 to -2</td>
                                <td align="center" colspan="1" rowspan="1">12 mg/dl</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1">2</td>
                                <td align="left" colspan="1" rowspan="1">Glucose</td>
                                <td align="center" colspan="1" rowspan="1">71&#x2013;86 mg/dl</td>
                                <td align="center" colspan="1" rowspan="1">-7.5 to -3</td>
                                <td align="center" colspan="1" rowspan="1">86 mg/dl</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1">3</td>
                                <td align="left" colspan="1" rowspan="1">Carbon Dioxide</td>
                                <td align="center" colspan="1" rowspan="1">18&#x2013;22 mmol/l</td>
                                <td align="center" colspan="1" rowspan="1">-6 to -2</td>
                                <td align="center" colspan="1" rowspan="1">25 mmol/l</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1">4</td>
                                <td align="left" colspan="1" rowspan="1">Total Cholesterol</td>
                                <td align="center" colspan="1" rowspan="1">130&#x2013;150 mg/dl</td>
                                <td align="center" colspan="1" rowspan="1">-5 to -1.5</td>
                                <td align="center" colspan="1" rowspan="1">195 mg/dl</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1">5</td>
                                <td align="left" colspan="1" rowspan="1">MCV</td>
                                <td align="center" colspan="1" rowspan="1">80&#x2013;85 fl</td>
                                <td align="center" colspan="1" rowspan="1">-3.5 to -1.0</td>
                                <td align="center" colspan="1" rowspan="1">90 fl</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1">6</td>
                                <td align="left" colspan="1" rowspan="1">LDH</td>
                                <td align="center" colspan="1" rowspan="1">120&#x2013;130 IU</td>
                                <td align="center" colspan="1" rowspan="1">-1 to 0</td>
                                <td align="center" colspan="1" rowspan="1">130 IU</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1">7</td>
                                <td align="left" colspan="1" rowspan="1">Creatinine</td>
                                <td align="center" colspan="1" rowspan="1">0.62&#x2013;0.78 mg/dl</td>
                                <td align="center" colspan="1" rowspan="1">-3 to 0</td>
                                <td align="center" colspan="1" rowspan="1">0.82 mg/dl</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1">8</td>
                                <td align="left" colspan="1" rowspan="1">RDW</td>
                                <td align="center" colspan="1" rowspan="1">10&#x2013;12 %</td>
                                <td align="center" colspan="1" rowspan="1">-4 to -0.5</td>
                                <td align="center" colspan="1" rowspan="1">0</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1">9</td>
                                <td align="left" colspan="1" rowspan="1">Lymphocytes</td>
                                <td align="center" colspan="1" rowspan="1">2.3&#x2013;3.0 &#x00d7;10E3/&#x00b5;l</td>
                                <td align="center" colspan="1" rowspan="1">-1.2 to 0</td>
                                <td align="center" colspan="1" rowspan="1">1.9 &#x00d7;10E3/&#x00b5;l</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1">10</td>
                                <td align="left" colspan="1" rowspan="1">Sodium</td>
                                <td align="center" colspan="1" rowspan="1">137&#x2013;139 mmol/l</td>
                                <td align="center" colspan="1" rowspan="1">-1.2 to 0</td>
                                <td align="center" colspan="1" rowspan="1">140 mmol/l</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1">11</td>
                                <td align="left" colspan="1" rowspan="1">AST</td>
                                <td align="center" colspan="1" rowspan="1">13&#x2013;17 IU</td>
                                <td align="center" colspan="1" rowspan="1">-2 to -0.5</td>
                                <td align="center" colspan="1" rowspan="1">22 IU</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1">12</td>
                                <td align="left" colspan="1" rowspan="1">Chloride</td>
                                <td align="center" colspan="1" rowspan="1">103&#x2013;106 mmol/l</td>
                                <td align="center" colspan="1" rowspan="1">-1.5 to 0</td>
                                <td align="center" colspan="1" rowspan="1">103 mmol/l</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1">13</td>
                                <td align="left" colspan="1" rowspan="1">GGT</td>
                                <td align="center" colspan="1" rowspan="1">5&#x2013;10 IU</td>
                                <td align="center" colspan="1" rowspan="1">-3 to -0.5</td>
                                <td align="center" colspan="1" rowspan="1">15 IU</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1">14</td>
                                <td align="left" colspan="1" rowspan="1">ALT</td>
                                <td align="center" colspan="1" rowspan="1">42&#x2013;44 IU</td>
                                <td align="center" colspan="1" rowspan="1">-4 to -1</td>
                                <td align="center" colspan="1" rowspan="1">21 IU</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1">15</td>
                                <td align="left" colspan="1" rowspan="1">ALP</td>
                                <td align="center" colspan="1" rowspan="1">40&#x2013;56 IU</td>
                                <td align="center" colspan="1" rowspan="1">-2 to -0.2</td>
                                <td align="center" colspan="1" rowspan="1">65 IU</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1">16</td>
                                <td align="left" colspan="1" rowspan="1">Albumin</td>
                                <td align="center" colspan="1" rowspan="1">4.6&#x2013;4.8 g/dl</td>
                                <td align="center" colspan="1" rowspan="1">-4 to -0.5</td>
                                <td align="center" colspan="1" rowspan="1">4.3 g/dl</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1">17</td>
                                <td align="left" colspan="1" rowspan="1">Neutrophils</td>
                                <td align="center" colspan="1" rowspan="1">6.6&#x2013;7 &#x00d7;10E3/&#x00b5;l</td>
                                <td align="center" colspan="1" rowspan="1">-2.5 to -0.5</td>
                                <td align="center" colspan="1" rowspan="1">5 &#x00d7;10E3/&#x00b5;l</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1">18</td>
                                <td align="left" colspan="1" rowspan="1">Ferritin</td>
                                <td align="center" colspan="1" rowspan="1">30&#x2013;50 ng/ml</td>
                                <td align="center" colspan="1" rowspan="1">-3 to 0</td>
                                <td align="center" colspan="1" rowspan="1">50 ng/ml</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1">19</td>
                                <td align="left" colspan="1" rowspan="1">Phosphorus</td>
                                <td align="center" colspan="1" rowspan="1">5.2&#x2013;7.1 mg/dl</td>
                                <td align="center" colspan="1" rowspan="1">-4.8 to -1</td>
                                <td align="center" colspan="1" rowspan="1">4.1 mg/dl</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1">20</td>
                                <td align="left" colspan="1" rowspan="1">Potassium</td>
                                <td align="center" colspan="1" rowspan="1">3.5&#x2013;3.9 mmol/l</td>
                                <td align="center" colspan="1" rowspan="1">-1 to 0</td>
                                <td align="center" colspan="1" rowspan="1">4.1 mmol/l</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <table-wrap id="T3" orientation="portrait" position="anchor">
                    <label>Table 3. </label>
                    <caption>
                        <title>Top 20 markers affecting predicted age in men.</title>
                        <p>Ranking of markers affecting predicted age in women, in order of importance, as determined by the SHAP summary outputs. Visual examination of the individual SHAP plots for each marker was used to estimate the range over which each marker would result in the lowest predicted age, and the magnitude of the adjustment in years. The final column is the value at which a marker changes from a net negative to net positive effect on biological age.</p>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="center" colspan="1" rowspan="1" valign="top">Marker Rank</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Marker</th>
                                <th align="center" colspan="1" rowspan="1" valign="top">Estimated range for
                                    <break/>lowest predicted age</th>
                                <th align="center" colspan="1" rowspan="1" valign="top">Magnitude of
                                    <break/>effect (years)</th>
                                <th align="center" colspan="1" rowspan="1" valign="top">Inflection
                                    <break/>point</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">1</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Albumin</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">4.6&#x2013;4.8 g/dl</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">-10 to -1</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">4.4 g/dl</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">2</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Glucose</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">70&#x2013;88 mg/dl</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">-7.0 to -1</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">96 mg/dl</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">3</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">BUN</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">6&#x2013;12 mg/dl</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">-7.5 to -1</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">14 mg/dl</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">4</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">RBC</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">5.0&#x2013;5.7 &#x00d7;10E3/&#x00b5;l</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">-4.0 to -0.5</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">4.8 &#x00d7;10E3/&#x00b5;l</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">5</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">RDW</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">11.0&#x2013;12.5 %</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">-7.5 to -1</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">13%</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">6</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">MCV</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">79&#x2013;87 fl</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">-4.0 to -1.5</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">90 fl</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">7</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">ALT</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">33&#x2013;45 IU</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">-4.5 to -0.5</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">28 IU</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">8</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Phosphorus</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">4.1&#x2013;4.5 mg/dl</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">-4.0 to -0.5</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">3.8 mg/dl</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">9</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Lymphocytes</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">1.9&#x2013;3.0 mg/dl</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">-2.0 to -0.5</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">1.8 &#x00d7;10E3/&#x00b5;l</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">10</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Total Cholesterol</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">100&#x2013;160 mg/dl</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">-5.5 to -0.5</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">190 mg/dl</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">11</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Platelets</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">250&#x2013;400 &#x00d7;10E3/&#x00b5;l</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">-3.0 to -0.2</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">210 &#x00d7;10E3/&#x00b5;l</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">12</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Potassium</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">3.5&#x2013;4.1 mmol/l</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">-1.8 to -0.2</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">4.2 mmol/l</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">13</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Creatinine</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.5&#x2013;1.0 mg/dl</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">-1.5 to 0</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">1.0 mg/dl</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">14</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">LDH</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">80&#x2013;120 IU</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">-3 to -0.5</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">130 IU</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">15</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Triglycerides</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">40&#x2013;60 mg/dl</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">-6 to -2</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">100 mg/dl</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">16</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Monocytes</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.1&#x2013;0.5 &#x00d7;10E3/&#x00b5;l</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">-1.5 to 0</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.6 &#x00d7;10E3/&#x00b5;l</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">17</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Neutrophils</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">1.5&#x2013;2.8 &#x00d7;10E3/&#x00b5;l</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">-2.2 to -0.2</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">3.4 &#x00d7;10E3/&#x00b5;l</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">18</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">MCHC</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">34.3&#x2013;35
                                    <italic toggle="yes">.7</italic> g/dl</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">-1 to 0</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">33.9 g/dl</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">19</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">GGT</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">6&#x2013;15 IU</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">-2.0 to 0</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">21 IU</td>
                            </tr>
                            <tr>
                                <td align="center" colspan="1" rowspan="1" valign="top">20</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Total Bilirubin</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.1&#x2013;0.6 mg/dl</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">-0.75 to 0</td>
                                <td align="center" colspan="1" rowspan="1" valign="top">0.8 mg/dl</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
            </sec>
            <sec>
                <title>Fully interpretable personalised predictions</title>
                <p>For a given individual, the model output allows for each marker to be individually weighted with regard to how it contributed to the final output (
                    <xref ref-type="fig" rid="f3">Figure 3</xref>). The average age in the training dataset (BIAS) is given as a starting point, with each marker subsequently increasing or decreasing predicted age by a number of years. This allows for the most influential markers for the individual to be determined. The example shown is for one of the study authors (C.K.), the data for whom is available on Zenodo
                    <sup>
                        <xref ref-type="bibr" rid="ref-9">9</xref>
                    </sup>. Bias (48.3 years) is sequentially adjusted, with the five markers contributing most to an increase in biological age were BUN (+3.5 years), total cholesterol (+2.8 years), potassium (+1.7 years), phosphorus (+1.2 years), and LDH (+0.9 years). The five markers contributing most to a decrease in biological age were lymphocytes (-1.2 years), RBCs (-2.3 years), albumin (-2.7 years) fasting glucose (-3.1 years), and triglycerides (-3.9 years). The final predicted biological age was 43.0 years.</p>
                <fig fig-type="figure" id="f3" orientation="portrait" position="float">
                    <label>Figure 3. </label>
                    <caption>
                        <title>Waterfall chart depicting how individual input markers contribute to a given predicted biological age (y-axis) for author C.K.</title>
                        <p>Bias (first column, 48.3 years) is the mean age in the input population. The five markers contributing most to an increase in biological age (columns 2&#x2013;6 from the left) were BUN, total cholesterol, potassium, phosphorus, and LDH. The five markers contributing most to a decrease in biological age (columns 2&#x2013;6 from the right) were lymphocytes, RBCs, albumin, glucose and triglycerides. The final predicted biological age (43.0 years) is in the last column.</p>
                    </caption>
                    <graphic orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/19198/6907fdc4-e604-4a5c-aab6-288fc8b45cf1_Figure3.gif"/>
                </fig>
            </sec>
        </sec>
        <sec sec-type="discussion">
            <title>Discussion</title>
            <p>Biomarkers of aging are increasingly important in the development and investigation of interventions with which to slow aging processes, which may also have the ability to aid in the treatment or prevention of aging-associated chronic disease. One such marker is the individual&#x2019;s biological or phenotypic age, as reflected by patterns of biochemical markers in the blood, which have previously been shown to be associated with risk of mortality
                <sup>
                    <xref ref-type="bibr" rid="ref-2">2</xref>,
                    <xref ref-type="bibr" rid="ref-3">3</xref>,
                    <xref ref-type="bibr" rid="ref-6">6</xref>
                </sup>. While there are a number of approaches to this problem in the published literature, we provide an alternative using a tree-based ML model that a) is fully interpretable, b) can be completely individualized for a given patient,  and c) allows the development of target ranges associated with a potential signature for slowed biological aging.</p>
            <p>One issue surrounding the utility of algorithmically-derived biological age is the response to any associated interventions or therapeutics. As this field is relatively new, it is uncertain how much an improvement in predicted biological age resulting from a given therapeutic approach will translate into improvements in longevity. Even if a given marker decreases predicted biological age, this also does not guarantee that manipulating the value will increase longevity. For instance, in our models, increasing ALT and decreasing total cholesterol were associated with lower predicted biological age; however, there are a number of scenarios where lower total cholesterol and higher ALT may be associated with increased mortality despite a lower predicted biological age
                <sup>
                    <xref ref-type="bibr" rid="ref-10">10</xref>,
                    <xref ref-type="bibr" rid="ref-11">11</xref>
                </sup>. Despite this, these models are at least able to generate hypotheses that can be tested in both the preclinical and clinical setting. Our approach also provides an example that other groups may use to produce fully-interpretable and personalisable outputs.</p>
            <p>Though the current analysis does not include confirmation of the ability to predict mortality risk, certain outputs from the algorithm
                <sup>
                    <xref ref-type="bibr" rid="ref-9">9</xref>
                </sup> do provide some confidence that the output is likely to be associated with individual health outcomes. For instance, the greatest increase in predicted age associated with fasting glucose level occurs in the range 90&#x2013;100 mg/dl, which is strikingly similar to the blood glucose level associated with the largest increase in mortality risk in multiple population studies
                <sup>
                    <xref ref-type="bibr" rid="ref-12">12</xref>,
                    <xref ref-type="bibr" rid="ref-13">13</xref>
                </sup>. Similar associations are seen with many of the target ranges derived from the algorithm
                <sup>
                    <xref ref-type="bibr" rid="ref-9">9</xref>
                </sup>, such as for albumin,  RDW, and ferritin (especially in men)
                <sup>
                    <xref ref-type="bibr" rid="ref-14">14</xref>&#x2013;
                    <xref ref-type="bibr" rid="ref-16">16</xref>
                </sup>.
</p>
            <p>If modulation of certain markers does indeed contribute to the reversal of cellular aging processes, the combination of an individual output with the population SHAP plots for a given marker could therefore allow for targeted therapeutic interventions aimed at improving biological age based on an individual&#x2019;s specific output. For instance, elevated fasting blood glucose could be decreased by addressing diet, exercise, micronutrient deficiencies, and reducing inflammation or psychosocial stress
                <sup>
                    <xref ref-type="bibr" rid="ref-17">17</xref>
                </sup>. Similar approaches are also likely to improve cholesterol, RDW, and MCV, confirming that lifestyle factors should play a key role in the pursuit of health and longevity
                <sup>
                    <xref ref-type="bibr" rid="ref-15">15</xref>,
                    <xref ref-type="bibr" rid="ref-18">18</xref>,
                    <xref ref-type="bibr" rid="ref-19">19</xref>
                </sup>. A personalised approach is important, because the markers contributing most strongly to biological age in the whole dataset are not necessarily the same markers that most strongly contribute to a prediction in a single individual (see example in 
                <xref ref-type="fig" rid="f3">Figure 3</xref>).</p>
            <p>The current approach does have some limitations. The dataset may only be applicable in the United States, as different countries and ethnic backgrounds might display variations in both baseline biochemistry and predicted longevity
                <sup>
                    <xref ref-type="bibr" rid="ref-3">3</xref>
                </sup>. Expanding available input data and allowing for stratification based on nationality and ethnic background will be the focus of future work. Larger and more expanded datasets will also allow for the analysis of biological aging in association with other potentially important factors such as genetics and the microbiota
                <sup>
                    <xref ref-type="bibr" rid="ref-20">20</xref>,
                    <xref ref-type="bibr" rid="ref-21">21</xref>
                </sup>. It is also worth mentioning that NHANES is designed to capture data that is representative of the US population. Therefore, this data comes from participants that represent a population that has some of the highest metabolic and cardiovascular disease prevalence in the Western world
                <sup>
                    <xref ref-type="bibr" rid="ref-22">22</xref>,
                    <xref ref-type="bibr" rid="ref-23">23</xref>
                </sup>, which may distort the results. Additionally, the current outputs would benefit from being correlated with disease outcomes or mortality in order to determine how well predicted biological age acts as an accurate biomarker of health and longevity.</p>
            <p>By using well-understood and robust biomarkers that are available to almost any clinician, methods such as those described in this study can be used immediately as adjuncts to research investigating the outcomes of interventions designed to increase human longevity. As multiple methods are currently available with which to predict biological or phenotypic age, the field should also collaborate in an attempt to compare methods such that we can find the approach that results in an accurate output that can most easily be used in both the research and clinical settings.</p>
        </sec>
        <sec>
            <title>Data availability</title>
            <p>All NHANES data used to produce the models is accessible through the CDC website (listed by NHANES study year): 
                <ext-link ext-link-type="uri" xlink:href="https://wwwn.cdc.gov/nchs/nhanes/search/default.aspx">https://wwwn.cdc.gov/nchs/nhanes/search/default.aspx</ext-link>.</p>
            <p>Data access, tabulation, and concatenation is automated by the &#x201c;01-download-preprocess&#x201d; Jupyter notebook file within our Zenodo repository; DOI: 
                <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5281/zenodo.2440203">https://doi.org/10.5281/zenodo.2440203</ext-link>
                <sup>
                    <xref ref-type="bibr" rid="ref-9">9</xref>
                </sup>. This repository also includes the original Quest laboratory test results from author C.K., which were used to provide the worked example (
                <xref ref-type="fig" rid="f3">Figure 3</xref>).</p>
        </sec>
        <sec>
            <title>Software availability</title>
            <p>
                <bold>The algorithm developed here, including the associated libraries and the necessary versions, are available on Zenodo:</bold> 
                <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5281/zenodo.2440203">https://doi.org/10.5281/zenodo.2440203</ext-link>
                <sup>
                    <xref ref-type="bibr" rid="ref-9">9</xref>
                </sup>.</p>
            <p>
                <bold>License:</bold> 
                <ext-link ext-link-type="uri" xlink:href="https://www.gnu.org/licenses/gpl-3.0.en.html">GNU General Public License version 3</ext-link>
            </p>
            <p>
                <bold>Notes:</bold> The algorithm itself can be trained and tested by running the &#x201c;02-train-test-explain&#x201d; Jupyter notebook. Note that each time the algorithm runs, a new random split in the dataset is generated in order to train and test the algorithm. Therefore, the resulting outputs might be slightly different.</p>
        </sec>
    </body>
    <back>
        <ref-list>
            <ref id="ref-1">
                <label>1</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Pyrkov</surname>
                            <given-names>TV</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Slipensky</surname>
                            <given-names>K</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Barg</surname>
                            <given-names>M</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Extracting biological age from biomedical data via deep learning: too much of a good thing?</article-title>
                    <source>
						
                        <italic toggle="yes">Sci Rep.</italic>
					</source>
                    <year>2018</year>;<volume>8</volume>(<issue>1</issue>): 5210.
                    <pub-id pub-id-type="pmid">29581467</pub-id>
                    <pub-id pub-id-type="doi">10.1038/s41598-018-23534-9</pub-id>
                    <pub-id pub-id-type="pmcid">5980076</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-2">
                <label>2</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Liu</surname>
                            <given-names>Z</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Kuo</surname>
                            <given-names>PL</given-names>
                        </name>
							
                        <name name-style="western">
                            <surname>Horvath</surname>
                            <given-names>S</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Phenotypic Age: a novel signature of mortality and morbidity risk.</article-title>
                    <source>
						
                        <italic toggle="yes">bioRxiv.</italic>
					</source>
                    <year>2018</year>: 363291.
                    <pub-id pub-id-type="doi">10.1101/363291</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-3">
                <label>3</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Mamoshina</surname>
                            <given-names>P</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Kochetov</surname>
                            <given-names>K</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Putin</surname>
                            <given-names>E</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Population Specific Biomarkers of Human Aging: A Big Data Study Using South Korean, Canadian, and Eastern European Patient Populations.</article-title>
                    <source>
						
                        <italic toggle="yes">J Gerontol A Biol Sci Med Sci.</italic>
					</source>
                    <year>2018</year>;<volume>73</volume>(<issue>11</issue>):<fpage>1482</fpage>&#x2013;<lpage>1490</lpage>.
                    <pub-id pub-id-type="pmid">29340580</pub-id>
                    <pub-id pub-id-type="doi">10.1093/gerona/gly005</pub-id>
                    <pub-id pub-id-type="pmcid">6175034</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-4">
                <label>4</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Levine</surname>
                            <given-names>ME</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Lu</surname>
                            <given-names>AT</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Quach</surname>
                            <given-names>A</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>An epigenetic biomarker of aging for lifespan and healthspan.</article-title>
                    <source>
						
                        <italic toggle="yes">Aging (Albany NY).</italic>
					</source>
                    <year>2018</year>;<volume>10</volume>(<issue>4</issue>):<fpage>573</fpage>&#x2013;<lpage>91</lpage>.
                    <pub-id pub-id-type="pmid">29676998</pub-id>
                    <pub-id pub-id-type="doi">10.18632/aging.101414</pub-id>
                    <pub-id pub-id-type="pmcid">5940111</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-5">
                <label>5</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Feil</surname>
                            <given-names>R</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Fraga</surname>
                            <given-names>MF</given-names>
                        </name>
					</person-group>:
                    <article-title>Epigenetics and the environment: emerging patterns and implications.</article-title>
                    <source>
						
                        <italic toggle="yes">Nat Rev Genet.</italic>
					</source>
                    <year>2012</year>;<volume>13</volume>(<issue>2</issue>):<fpage>97</fpage>&#x2013;<lpage>109</lpage>.
                    <pub-id pub-id-type="pmid">22215131</pub-id>
                    <pub-id pub-id-type="doi">10.1038/nrg3142</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-6">
                <label>6</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Belsky</surname>
                            <given-names>DW</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Caspi</surname>
                            <given-names>A</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Houts</surname>
                            <given-names>R</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Quantification of biological aging in young adults.</article-title>
                    <source>
						
                        <italic toggle="yes">Proc Natl Acad Sci U S A.</italic>
					</source>
                    <year>2015</year>;<volume>112</volume>(<issue>30</issue>):<fpage>E4104</fpage>&#x2013;<lpage>10</lpage>.
                    <pub-id pub-id-type="pmid">26150497</pub-id>
                    <pub-id pub-id-type="doi">10.1073/pnas.1506264112</pub-id>
                    <pub-id pub-id-type="pmcid">4522793</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-7">
                <label>7</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Montavon</surname>
                            <given-names>G</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Samek</surname>
                            <given-names>W</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>M&#x00fc;ller</surname>
                            <given-names>K-R</given-names>
                        </name>
					</person-group>:
                    <article-title>Methods for interpreting and understanding deep neural networks.</article-title>
                    <source>
						
                        <italic toggle="yes">Digital Signal Processing.</italic>
					</source>
                    <year>2018</year>;<volume>73</volume>:<fpage>1</fpage>&#x2013;<lpage>15</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.dsp.2017.10.011</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-8">
                <label>8</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Lundberg</surname>
                            <given-names>SM</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Nair</surname>
                            <given-names>B</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Vavilala</surname>
                            <given-names>MS</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Explainable machine-learning predictions for the prevention of hypoxaemia during surgery.</article-title>
                    <source>
						
                        <italic toggle="yes">Nat Biomed Eng.</italic>
					</source>
                    <year>2018</year>;<volume>2</volume>:<fpage>749</fpage>&#x2013;<lpage>60</lpage>.
                    <pub-id pub-id-type="doi">10.1038/s41551-018-0304-0</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-9">
                <label>9</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Kelly</surname>
                            <given-names>C</given-names>
                        </name>
					</person-group>:
                    <article-title>cck197/ml-bio-age: Initial release (Version v1.0).</article-title>
                    <source>
						
                        <italic toggle="yes">Zenodo.</italic>
					</source>
                    <year>2018</year>.
                    <ext-link ext-link-type="uri" xlink:href="http://www.doi.org/10.5281/zenodo.2440203">http://www.doi.org/10.5281/zenodo.2440203</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref-10">
                <label>10</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Petursson</surname>
                            <given-names>H</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Sigurdsson</surname>
                            <given-names>JA</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Bengtsson</surname>
                            <given-names>C</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Is the use of cholesterol in mortality risk algorithms in clinical guidelines valid? Ten years prospective data from the Norwegian HUNT 2 study.</article-title>
                    <source>
						
                        <italic toggle="yes">J Eval Clin Pract.</italic>
					</source>
                    <year>2012</year>;<volume>18</volume>(<issue>1</issue>):<fpage>159</fpage>&#x2013;<lpage>68</lpage>.
                    <pub-id pub-id-type="pmid">21951982</pub-id>
                    <pub-id pub-id-type="doi">10.1111/j.1365-2753.2011.01767.x</pub-id>
                    <pub-id pub-id-type="pmcid">3303886</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-11">
                <label>11</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Kunutsor</surname>
                            <given-names>SK</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Apekey</surname>
                            <given-names>TA</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Seddoh</surname>
                            <given-names>D</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Liver enzymes and risk of all-cause mortality in general populations: a systematic review and meta-analysis.</article-title>
                    <source>
						
                        <italic toggle="yes">Int J Epidemiol.</italic>
					</source>
                    <year>2014</year>;<volume>43</volume>(<issue>1</issue>):<fpage>187</fpage>&#x2013;<lpage>201</lpage>.
                    <pub-id pub-id-type="pmid">24585856</pub-id>
                    <pub-id pub-id-type="doi">10.1093/ije/dyt192</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-12">
                <label>12</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Yi</surname>
                            <given-names>SW</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Park</surname>
                            <given-names>S</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Lee</surname>
                            <given-names>YH</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Association between fasting glucose and all-cause mortality according to sex and age: a prospective cohort study.</article-title>
                    <source>
						
                        <italic toggle="yes">Sci Rep.</italic>
					</source>
                    <year>2017</year>;<volume>7</volume>(<issue>1</issue>):<fpage>8194</fpage>.
                    <pub-id pub-id-type="pmid">28811570</pub-id>
                    <pub-id pub-id-type="doi">10.1038/s41598-017-08498-6</pub-id>
                    <pub-id pub-id-type="pmcid">5557842</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-13">
                <label>13</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Bj&#x00f8;rnholt</surname>
                            <given-names>JV</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Erikssen</surname>
                            <given-names>G</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Aaser</surname>
                            <given-names>E</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Fasting blood glucose: an underestimated risk factor for cardiovascular death. Results from a 22-year follow-up of healthy nondiabetic men.</article-title>
                    <source>
						
                        <italic toggle="yes">Diabetes Care.</italic>
					</source>
                    <year>1999</year>;<volume>22</volume>(<issue>1</issue>):<fpage>45</fpage>&#x2013;<lpage>9</lpage>.
                    <pub-id pub-id-type="pmid">10333902</pub-id>
                    <pub-id pub-id-type="doi">10.2337/diacare.22.1.45</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-14">
                <label>14</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Fulks</surname>
                            <given-names>M</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Stout</surname>
                            <given-names>RL</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Dolan</surname>
                            <given-names>VF</given-names>
                        </name>
					</person-group>:
                    <article-title>Albumin and all-cause mortality risk in insurance applicants.</article-title>
                    <source>
						
                        <italic toggle="yes">J Insur Med.</italic>
					</source>
                    <year>2010</year>;<volume>42</volume>(<issue>1</issue>):<fpage>11</fpage>&#x2013;<lpage>7</lpage>.
                    <pub-id pub-id-type="pmid">21290995</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-15">
                <label>15</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Zurauskaite</surname>
                            <given-names>G</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Meier</surname>
                            <given-names>M</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Voegeli</surname>
                            <given-names>A</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Biological pathways underlying the association of red cell distribution width and adverse clinical outcome: Results of a prospective cohort study.</article-title>
                    <source>
						
                        <italic toggle="yes">PLoS One.</italic>
					</source>
                    <year>2018</year>;<volume>13</volume>(<issue>1</issue>):<fpage>e0191280</fpage>.
                    <pub-id pub-id-type="pmid">29342203</pub-id>
                    <pub-id pub-id-type="doi">10.1371/journal.pone.0191280</pub-id>
                    <pub-id pub-id-type="pmcid">5771602</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-16">
                <label>16</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Kadoglou</surname>
                            <given-names>NPE</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Biddulph</surname>
                            <given-names>JP</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Rafnsson</surname>
                            <given-names>SB</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>The association of ferritin with cardiovascular and all-cause mortality in community-dwellers: The English longitudinal study of ageing.</article-title>
                    <source>
						
                        <italic toggle="yes">PLoS One.</italic>
					</source>
                    <year>2017</year>;<volume>12</volume>(<issue>6</issue>):<fpage>e0178994</fpage>.
                    <pub-id pub-id-type="pmid">28591160</pub-id>
                    <pub-id pub-id-type="doi">10.1371/journal.pone.0178994</pub-id>
                    <pub-id pub-id-type="pmcid">5462410</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-17">
                <label>17</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Kolb</surname>
                            <given-names>H</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Martin</surname>
                            <given-names>S</given-names>
                        </name>
					</person-group>:
                    <article-title>Environmental/lifestyle factors in the pathogenesis and prevention of type 2 diabetes.</article-title>
                    <source>
						
                        <italic toggle="yes">BMC Med.</italic>
					</source>
                    <year>2017</year>;<volume>15</volume>(<issue>1</issue>):<fpage>131</fpage>.
                    <pub-id pub-id-type="pmid">28720102</pub-id>
                    <pub-id pub-id-type="doi">10.1186/s12916-017-0901-x</pub-id>
                    <pub-id pub-id-type="pmcid">5516328</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-18">
                <label>18</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Kelley</surname>
                            <given-names>GA</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Kelley</surname>
                            <given-names>KS</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Roberts</surname>
                            <given-names>S</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Comparison of aerobic exercise, diet or both on lipids and lipoproteins in adults: a meta-analysis of randomized controlled trials.</article-title>
                    <source>
						
                        <italic toggle="yes">Clin Nutr.</italic>
					</source>
                    <year>2012</year>;<volume>31</volume>(<issue>2</issue>):<fpage>156</fpage>&#x2013;<lpage>67</lpage>.
                    <pub-id pub-id-type="pmid">22154987</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.clnu.2011.11.011</pub-id>
                    <pub-id pub-id-type="pmcid">3311746</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-19">
                <label>19</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Aslinia</surname>
                            <given-names>F</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Mazza</surname>
                            <given-names>JJ</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Yale</surname>
                            <given-names>SH</given-names>
                        </name>
					</person-group>:
                    <article-title>Megaloblastic anemia and other causes of macrocytosis.</article-title>
                    <source>
						
                        <italic toggle="yes">Clin Med Res.</italic>
					</source>
                    <year>2006</year>;<volume>4</volume>(<issue>3</issue>):<fpage>236</fpage>&#x2013;<lpage>41</lpage>.
                    <pub-id pub-id-type="pmid">16988104</pub-id>
                    <pub-id pub-id-type="doi">10.3121/cmr.4.3.236</pub-id>
                    <pub-id pub-id-type="pmcid">1570488</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-20">
                <label>20</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Biagi</surname>
                            <given-names>E</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Franceschi</surname>
                            <given-names>C</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Rampelli</surname>
                            <given-names>S</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Gut Microbiota and Extreme Longevity.</article-title>
                    <source>
						
                        <italic toggle="yes">Curr Biol.</italic>
					</source>
                    <year>2016</year>;<volume>26</volume>(<issue>11</issue>):<fpage>1480</fpage>&#x2013;<lpage>5</lpage>.
                    <pub-id pub-id-type="pmid">27185560</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.cub.2016.04.016</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-21">
                <label>21</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Govindaraju</surname>
                            <given-names>D</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Atzmon</surname>
                            <given-names>G</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Barzilai</surname>
                            <given-names>N</given-names>
                        </name>
					</person-group>:
                    <article-title>Genetics, lifestyle and longevity: Lessons from centenarians.</article-title>
                    <source>
						
                        <italic toggle="yes">Appl Transl Genom.</italic>
					</source>
                    <year>2015</year>;<volume>4</volume>:<fpage>23</fpage>&#x2013;<lpage>32</lpage>.
                    <pub-id pub-id-type="pmid">26937346</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.atg.2015.01.001</pub-id>
                    <pub-id pub-id-type="pmcid">4745363</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-22">
                <label>22</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Benjamin</surname>
                            <given-names>EJ</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Blaha</surname>
                            <given-names>MJ</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Chiuve</surname>
                            <given-names>SE</given-names>
                        </name>
						
                        <etal/>
					</person-group>:
                    <article-title>Heart Disease and Stroke Statistics-2017 Update: A Report From the American Heart Association.</article-title>
                    <source>
						
                        <italic toggle="yes">Circulation.</italic>
					</source>
                    <year>2017</year>;<volume>135</volume>(<issue>10</issue>):<fpage>e146</fpage>&#x2013;<lpage>e603</lpage>.
                    <pub-id pub-id-type="pmid">28122885</pub-id>
                    <pub-id pub-id-type="doi">10.1161/CIR.0000000000000485</pub-id>
                    <pub-id pub-id-type="pmcid">5408160</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref-23">
                <label>23</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">
						
                        <name name-style="western">
                            <surname>Bhupathiraju</surname>
                            <given-names>SN</given-names>
                        </name>
						
                        <name name-style="western">
                            <surname>Hu</surname>
                            <given-names>FB</given-names>
                        </name>
					</person-group>:
                    <article-title>Epidemiology of Obesity and Diabetes and Their Cardiovascular Complications.</article-title>
                    <source>
						
                        <italic toggle="yes">Circ Res.</italic>
					</source>
                    <year>2016</year>;<volume>118</volume>(<issue>11</issue>):<fpage>1723</fpage>&#x2013;<lpage>35</lpage>.
                    <pub-id pub-id-type="pmid">27230638</pub-id>
                    <pub-id pub-id-type="doi">10.1161/CIRCRESAHA.115.306825</pub-id>
                    <pub-id pub-id-type="pmcid">4887150</pub-id>
                </mixed-citation>
            </ref>
        </ref-list>
    </back>
    <sub-article article-type="reviewer-report" id="report43612">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.19198.r43612</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Fedichev</surname>
                        <given-names>Peter O.</given-names>
                    </name>
                    <xref ref-type="aff" rid="r43612a1">1</xref>
                    <xref ref-type="aff" rid="r43612a2">2</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0003-0404-808X</uri>
                </contrib>
                <aff id="r43612a1">
                    <label>1</label>Gero LLC, Singapore, Singapore</aff>
                <aff id="r43612a2">
                    <label>2</label>Moscow Institute of Physics and Technology, Moscow Region, Russian Federation</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>PF is a founder and an employee of Gero LLC, the company is involved in development and commercialization of biomarkers of aging</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>12</day>
                <month>2</month>
                <year>2019</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2019 Fedichev PO</copyright-statement>
                <copyright-year>2019</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport43612" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.17555.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve-with-reservations</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>The manuscript concerns quantification of aging by means of biological age (BA) model trained as a predictor of chronological age from the widely available blood markers (complete blood cell counts and biochemistry). Understanding capabilities and the biology behind such biomarkers are among the key issues in fundamental aging studies and could be very helpful for practical applications.&#x00a0;</p>
            <p> </p>
            <p> The manuscript, however, falls short to provide the necessary characterization of the proposed BA model. I believe that the presentation could be improved by addressing the issues listed below so that the results of the study could be eventually indexed in a revised form.&#x00a0;</p>
            <p> </p>
            <p> The authors introduced and documented the performance of the particular ML pipeline (XGBoost flavor of decision tree algorithms) trained to predict the chronological age from the blood markers provided by the National Health and Nutrition Examination Survey (NHANES). The rationalizations behind the approach were two-fold. First, the biological age predictor could be (at least according to previous studies) associated with all-cause and disease-specific mortality. Second, the proposed algorithm could produce a better interpretation of the biological age model output in a form, eventually suitable for personalized recommendations.&#x00a0;</p>
            <p> </p>
            <p> Unfortunately, the results presented in the manuscript are not sufficient to fully judge the merits of the model.</p>
            <p> </p>
            <p> Major issues: 
                <list list-type="order">
                    <list-item>
                        <p>This is not the first work concerning the biological age estimation from the blood markers in general or in NHANES in particular. I would expect more references to previous work and different machine learning techniques (from principal components analysis to deep learning). I would take a log-linear mortality model from Levine 2018
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-43612-1">1</xref>
                            </sup> and a deep learning model from Putin 2016
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-43612-5">5</xref>
                            </sup>&#x00a0;as state of the art modern implementations</p>
                    </list-item>
                    <list-item>
                        <p>The results should be compared with a reference model. I would not expect anything sophisticated, but there must be a comparison. For example, would the novel XGBoost method perform better than a linear regression to chronological age?&#x00a0;</p>
                    </list-item>
                    <list-item>
                        <p>What is the correct measure of the model's performance? A biological age should not be judged by the quality of the chronological age prediction only. I&#x0435;t has been shown that improvements in the accuracy of this class of BA models may lead to a degradation of the association with chronic diseases and mortality (Levine 2018
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-43612-1">1</xref>
                            </sup>, Pyrkov 2018a
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-43612-2">2</xref>
                            </sup>. The open access part of the NHANES database contains enough death events and clinical diagnosis. I propose to demonstrate how strongly the proposed BA is associated with the remaining lifespan (Cox-regression significance test )? Is there an association of the biological age (after adjustment for age and sex) &#x00a0;with lifestyles (such as smoking, see Pyrkov 2018b
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-43612-3">3</xref>
                            </sup>, Mamoshina 2019
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-43612-4">4</xref>
                            </sup>, etc). Are the effects of smoking reversible in cohorts of individuals, who quit smoking (see Pyrkov 2018b
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-43612-3">3</xref>
                            </sup>)? What is the aging acceleration in years associated with smoking (see Mamoshina 2019
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-43612-4">4</xref>
                            </sup>)? How is it related to the actual lifespan depreciation associated with smoking? Is the biological age associated with chronic diseases?</p>
                    </list-item>
                    <list-item>
                        <p>A linear model, such as a (regularized) regression to age, a log-linear proportional hazard model, would also provide the biological age estimation with contributions associated with the specific markers. Without comparison with a reference linear model, it would be difficult to argue that a more sophisticated approach is easier to interpret.&#x00a0;</p>
                    </list-item>
                </list> Let me list a number of minor points, recommendations for the discussion (not necessarily calculations!): 
                <list list-type="order">
                    <list-item>
                        <p>It would be reasonable to discuss hyperparameters involved in the XGBoost model tuning. How those parameters were selected?</p>
                    </list-item>
                    <list-item>
                        <p>There is a log-linear proportional hazard model predicting mortality in NHANES (Levine 2018
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-43612-1">1</xref>
                            </sup>). Is&#x00a0;there a way to see if the XGBoost model is better? Is it possible to produce a prophetic statement? Could the authors speculate if their model is more or less statistically powerful than the phenoage?</p>
                    </list-item>
                    <list-item>
                        <p>In the authors' opinion, what are advantages or disadvantages of XGBoost over deep learning models, such as Zhavoronkov?</p>
                    </list-item>
                    <list-item>
                        <p>Is there a way to improve the biological age assessment with XGBoost in combination with proportional hazards models?</p>
                    </list-item>
                </list>
            </p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>Partly</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>Yes</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Yes</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>Partly</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>Yes</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Yes</p>
            <p>Reviewer Expertise:</p>
            <p>aging research, biomarkers of aging, theory of aging, aging therapeutics</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above.</p>
        </body>
        <back>
            <ref-list>
                <title>References</title>
                <ref id="rep-ref-43612-1">
                    <label>1</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>An epigenetic biomarker of aging for lifespan and healthspan</article-title>.
                        <source>
                            <italic>Aging</italic>
                        </source>.<year>2018</year>;<volume>10</volume>(<issue>4</issue>) :
                        <elocation-id>10.18632/aging.101414</elocation-id>
                        <fpage>573</fpage>-<lpage>591</lpage>
                        <pub-id pub-id-type="doi">10.18632/aging.101414</pub-id>
                    </mixed-citation>
                </ref>
                <ref id="rep-ref-43612-2">
                    <label>2</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Extracting biological age from biomedical data via deep learning: too much of a good thing?</article-title>.
                        <source>
                            <italic>Sci Rep</italic>
                        </source>.<year>2018</year>;<volume>8</volume>(<issue>1</issue>) :
                        <elocation-id>10.1038/s41598-018-23534-9</elocation-id>
                        <fpage>5210</fpage>
                        <pub-id pub-id-type="pmid">29581467</pub-id>
                        <pub-id pub-id-type="doi">10.1038/s41598-018-23534-9</pub-id>
                    </mixed-citation>
                </ref>
                <ref id="rep-ref-43612-3">
                    <label>3</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Quantitative characterization of biological age and frailty based on locomotor activity records.</article-title>
                        <source>
                            <italic>Aging (Albany NY)</italic>
                        </source>.<year>2018</year>;<volume>10</volume>(<issue>10</issue>) :
                        <elocation-id>10.18632/aging.101603</elocation-id>
                        <fpage>2973</fpage>-<lpage>2990</lpage>
                        <pub-id pub-id-type="pmid">30362959</pub-id>
                        <pub-id pub-id-type="doi">10.18632/aging.101603</pub-id>
                    </mixed-citation>
                </ref>
                <ref id="rep-ref-43612-4">
                    <label>4</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Blood Biochemistry Analysis to Detect Smoking Status and Quantify Accelerated Aging in Smokers.</article-title>
                        <source>
                            <italic>Sci Rep</italic>
                        </source>.<year>2019</year>;<volume>9</volume>(<issue>1</issue>) :
                        <elocation-id>10.1038/s41598-018-35704-w</elocation-id>
                        <fpage>142</fpage>
                        <pub-id pub-id-type="pmid">30644411</pub-id>
                        <pub-id pub-id-type="doi">10.1038/s41598-018-35704-w</pub-id>
                    </mixed-citation>
                </ref>
                <ref id="rep-ref-43612-5">
                    <label>5</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Deep biomarkers of human aging: Application of deep neural networks to biomarker development.</article-title>
                        <source>
                            <italic>Aging (Albany NY)</italic>
                        </source>.<volume>8</volume>(<issue>5</issue>) :
                        <elocation-id>10.18632/aging.100968</elocation-id>
                        <fpage>1021</fpage>-<lpage>33</lpage>
                        <pub-id pub-id-type="pmid">27191382</pub-id>
                        <pub-id pub-id-type="doi">10.18632/aging.100968</pub-id>
                    </mixed-citation>
                </ref>
            </ref-list>
        </back>
    </sub-article>
    <sub-article article-type="reviewer-report" id="report42590">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.19198.r42590</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Zhavoronkov</surname>
                        <given-names>Alex</given-names>
                    </name>
                    <xref ref-type="aff" rid="r42590a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0001-7067-8966</uri>
                </contrib>
                <aff id="r42590a1">
                    <label>1</label>Insilico Medicine, Inc.,, Baltimore, MD, USA</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>15</day>
                <month>1</month>
                <year>2019</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2019 Zhavoronkov A</copyright-statement>
                <copyright-year>2019</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport42590" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.17555.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve-with-reservations</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>While the study is not novel and the technical sophistication is considerably low, the study addresses one of the most important challenges in biomedicine and I recommend accepting it if the authors agree to make substantial improvements to the manuscript, try out the other machine learning methods, research the prior art, and expand the methodology.</p>
            <p> </p>
            <p> Firstly, the study does not provide an overview of the other interpretable biomarkers of aging developed using the multiple data types. Some of the prior clocks are described here: Zhavoronkov 
                <italic>et al</italic>.
                <sup>
                    <xref ref-type="bibr" rid="rep-ref-42590-1">1</xref>
                </sup>. It is very similar to the study published in 2016 (
                <italic>Putin et al</italic>.
                <sup>
                    <xref ref-type="bibr" rid="rep-ref-42590-2">2</xref>
                </sup>)&#x00a0;which not only introduced the concept but also provided a comparison with the other machine learning methods including GBM, RF, DT, LR, kNN, ElasticNet, SVM and DNNs and an online testing platform for the hematological aging clocks.&#x00a0;</p>
            <p> </p>
            <p> All of these machine learning methods allow for the various feature selection and feature importance techniques that provide very different results and pick the most important features differently. This paper explains the differences in how the different machine learning techniques prioritize different genes using the transcriptomic age predictor (Mamoshina 
                <italic>et al</italic>.
                <sup>
                    <xref ref-type="bibr" rid="rep-ref-42590-3">3</xref>
                </sup>). This is not a recommendation for citing these papers but an example of the work that needs to be done.</p>
            <p> As it stands, the study looks like a student machine learning data processing exercise and application of the out-of-the-box of python library on the NHANES dataset rather than a complete research paper. The conclusion that SHAP library is a good tool for interpreting the results from a machine learning model is not surprising at all. The paper can be hardly called a methodological paper because it lacks novelty of both methods of age prediction and comparison with classical methods of age prediction using a common blood test.</p>
            <p> </p>
            <p> There is a number of issues I noticed that need to be addressed: 
                <list list-type="order">
                    <list-item>
                        <p>The paper is lacking the information on how the train and test set were selected along with the age by sex distribution. Was the training and optimization of models performed without cross-validation? At the same time, NHANES data also contains people with various conditions including diabetes and kidney disease. Were those individuals excluded from the training process? These important questions are not clear from the paper and need to be clarified.</p>
                    </list-item>
                    <list-item>
                        <p>Related to comment #1: how does the model perform&#x00a0;on individuals with chronic diseases?</p>
                    </list-item>
                    <list-item>
                        <p>It is not clear why the predicted age is referred to as &#x2018;biological age&#x2019;. Biological age should be predictive of mortality. The observed difference between predicted and actual age should be associated with outcome in terms of morbidity or mortality. This should be explored in details with respect to the interpretation of the age predictor results. NHANES data has information about mortality that can be used for this type of analysis. At this point, the analysis suggests that selected blood parameters are associated with age and so predictive of chronological age. This type of analysis was performed in one of the referenced papers utilizing the NHANES dataset but not in this paper. It needs to be performed in order for the paper to be published.</p>
                    </list-item>
                    <list-item>
                        <p>The baseline is lacking. What would the performance be if you predict all samples as a median age for the population? Would it be higher or would it be the same as the test set error?</p>
                    </list-item>
                    <list-item>
                        <p>In line with the above comments, because the performance evaluation is not rigorous and no hyperparameter selection was performed, it is not clear why this age prediction method was selected. One of the commonly used and extensively validated models is Klemera and Doubal. (Klemera P, Doubal S. A new approach to the concept and computation of biological age 
                            <sup>
                                <xref ref-type="bibr" rid="rep-ref-42590-4">4</xref>
                            </sup>). I would suggest exploring KD age prediction model in terms of interoperability of the blood test markers. Would be the machine learning model better? If so, why?</p>
                    </list-item>
                    <list-item>
                        <p>As mentioned above, there is no baseline model, comparison of different models or hyperparameters tuning. Without the interpretation of the difference between the predicted and actual chronological age (association with mortality or diseases for example), this difference is just an error of the model. How this error of the model would affect the results? Would the results change if the model is trained on samples that were initially predicted accurately? What about the samples predicted with a greater error? This need to be explored.</p>
                    </list-item>
                    <list-item>
                        <p>Related to the point, age distribution plots of those randomly selected samples are needed. How would different age groups contribute to the results?</p>
                    </list-item>
                    <list-item>
                        <p>Instead of using k-fold cross-validation authors used just random 80/20 train/test split, so results presented at figure 2 (SHAP summary plots) cannot be interpreted as stable. E.g. for men the first 5 biomarkers are very similar in terms of importance for age prediction, so the order of these five biomarkers probably will be changed using different random data split.</p>
                    </list-item>
                    <list-item>
                        <p>Preprocessing is rather scarce. E.g. outlier analysis was not provided. Were they excluded from the analysis? If not, why and how they would contribute the SHAP summary plots?</p>
                    </list-item>
                    <list-item>
                        <p>I would like to see the comparison of the estimated reference ranges with commonly accepted reference ranges.</p>
                    </list-item>
                    <list-item>
                        <p>A linear fit line on figure 1 is barely visible because dots and line are plotted using the same color</p>
                    </list-item>
                    <list-item>
                        <p>It is always a good practice to provide figures optimized color blind readers. Figure 2 colors are hardly distinguishable.</p>
                    </list-item>
                    <list-item>
                        <p>Figure 3 is lacking the actual chronological age of the individual analyzed.</p>
                    </list-item>
                </list> My recommendation is to address these points and explore the prior art. Biological age prediction using machine learning is a very interesting and important field and the studies need to be consistent and comparable.</p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>No</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>Partly</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Yes</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>No</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>No</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Yes</p>
            <p>Reviewer Expertise:</p>
            <p>aging research, machine learning</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above.</p>
        </body>
        <back>
            <ref-list>
                <title>References</title>
                <ref id="rep-ref-42590-1">
                    <label>1</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Artificial intelligence for aging and longevity research: Recent advances and perspectives.</article-title>
                        <source>
                            <italic>Ageing Res Rev</italic>
                        </source>.<year>2019</year>;<volume>49</volume>:
                        <elocation-id>10.1016/j.arr.2018.11.003</elocation-id>
                        <fpage>49</fpage>-<lpage>66</lpage>
                        <pub-id pub-id-type="pmid">30472217</pub-id>
                        <pub-id pub-id-type="doi">10.1016/j.arr.2018.11.003</pub-id>
                    </mixed-citation>
                </ref>
                <ref id="rep-ref-42590-2">
                    <label>2</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Deep biomarkers of human aging: Application of deep neural networks to biomarker development.</article-title>
                        <source>
                            <italic>Aging (Albany NY)</italic>
                        </source>.<volume>8</volume>(<issue>5</issue>) :
                        <elocation-id>10.18632/aging.100968</elocation-id>
                        <fpage>1021</fpage>-<lpage>33</lpage>
                        <pub-id pub-id-type="pmid">27191382</pub-id>
                        <pub-id pub-id-type="doi">10.18632/aging.100968</pub-id>
                    </mixed-citation>
                </ref>
                <ref id="rep-ref-42590-3">
                    <label>3</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Machine Learning on Human Muscle Transcriptomic Data for Biomarker Discovery and Tissue-Specific Drug Target Identification.</article-title>
                        <source>
                            <italic>Front Genet</italic>
                        </source>.<year>2018</year>;<volume>9</volume>:
                        <elocation-id>10.3389/fgene.2018.00242</elocation-id>
                        <fpage>242</fpage>
                        <pub-id pub-id-type="pmid">30050560</pub-id>
                        <pub-id pub-id-type="doi">10.3389/fgene.2018.00242</pub-id>
                    </mixed-citation>
                </ref>
                <ref id="rep-ref-42590-4">
                    <label>4</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>A new approach to the concept and computation of biological age.</article-title>
                        <source>
                            <italic>Mech Ageing Dev</italic>
                        </source>.<year>2006</year>;<volume>127</volume>(<issue>3</issue>) :
                        <elocation-id>10.1016/j.mad.2005.10.004</elocation-id>
                        <fpage>240</fpage>-<lpage>8</lpage>
                        <pub-id pub-id-type="pmid">16318865</pub-id>
                        <pub-id pub-id-type="doi">10.1016/j.mad.2005.10.004</pub-id>
                    </mixed-citation>
                </ref>
            </ref-list>
        </back>
    </sub-article>
</article>
