<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN" "http://jats.nlm.nih.gov/publishing/1.2/JATS-journalpublishing1.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="1.2" xml:lang="en">
    <front>
        <journal-meta>
            <journal-id journal-id-type="pmc">F1000Research</journal-id>
            <journal-title-group>
                <journal-title>F1000Research</journal-title>
            </journal-title-group>
            <issn pub-type="epub">2046-1402</issn>
            <publisher>
                <publisher-name>F1000 Research Limited</publisher-name>
                <publisher-loc>London, UK</publisher-loc>
            </publisher>
        </journal-meta>
        <article-meta>
            <article-id pub-id-type="doi">10.12688/f1000research.142428.1</article-id>
            <article-categories>
                <subj-group subj-group-type="heading">
                    <subject>Research Article</subject>
                </subj-group>
                <subj-group>
                    <subject>Articles</subject>
                </subj-group>
            </article-categories>
            <title-group>
                <article-title>Evaluation of accuracy and potential harm of ChatGPT in medical nutrition therapy - a case-based approach</article-title>
                <fn-group content-type="pub-status">
                    <fn>
                        <p>[version 1; peer review: 1 approved with reservations, 1 not approved]</p>
                    </fn>
                </fn-group>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Mishra</surname>
                        <given-names>Vinaytosh</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-6360-910X</uri>
                    <xref ref-type="corresp" rid="c1">a</xref>
                    <xref ref-type="aff" rid="a1">1</xref>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Jafri</surname>
                        <given-names>Fahmida</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Abdul Kareem</surname>
                        <given-names>Nafeesa</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Formal Analysis</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-9199-3049</uri>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Aboobacker</surname>
                        <given-names>Raseena</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Noora</surname>
                        <given-names>Fatma</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Data Curation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0001-8180-8770</uri>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <aff id="a1">
                    <label>1</label>Datta Meghe Institute of Higher Education and Research, Nagpur, Maharashtra, India</aff>
                <aff id="a2">
                    <label>2</label>Gulf Medical University, Ajman, UAE, Ajman, United Arab Emirates</aff>
            </contrib-group>
            <author-notes>
                <corresp id="c1">
                    <label>a</label>
                    <email xlink:href="mailto:vinaytosh@gmail.com">vinaytosh@gmail.com</email>
                </corresp>
                <fn fn-type="conflict">
                    <p>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>22</day>
                <month>2</month>
                <year>2024</year>
            </pub-date>
            <pub-date pub-type="collection">
                <year>2024</year>
            </pub-date>
            <volume>13</volume>
            <elocation-id>137</elocation-id>
            <history>
                <date date-type="accepted">
                    <day>14</day>
                    <month>11</month>
                    <year>2023</year>
                </date>
            </history>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2024 Mishra V et al.</copyright-statement>
                <copyright-year>2024</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access article distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <self-uri content-type="pdf" xlink:href="https://f1000research.com/articles/13-137/pdf"/>
            <abstract>
                <sec>
                    <title>Background</title>
                    <p>ChatGPT is a conversational large language model (LLM) based on artificial intelligence (AI). LLMs may be applied in health care education, research, and practice if relevant valid concerns are proactively addressed. The current study aimed to investigate ChatGPT&#x2019;s ability to generate accurate and comprehensive responses to nutritional queries created by nutritionists/dieticians.</p>
                </sec>
                <sec>
                    <title>Methods</title>
                    <p>An in-depth case study approach was used to accomplish the research objectives. Functional testing was performed, creating test cases based on the functional requirement of the software application. ChatGPT responses were evaluated and analyzed using various scenarios requiring medical nutritional therapy, which were created with varied complexity. Based on the accuracy of the generated data, which were evaluated by a registered nutritionist, a potential harm score for the responses from Chat GPT was used as evaluation.</p>
                </sec>
                <sec>
                    <title>Results</title>
                    <p>Eight case scenarios with varied complexity when evaluated revealed that, as the complexity of the scenario increased, it led to an increase in the risk potential. Although the accuracy of the generated response does not change much with the complexity of the case scenarios, the study suggests that ChatGPT should be avoided for generating responses for complex medical nutritional conditions or scenarios.</p>
                </sec>
                <sec>
                    <title>Conclusions</title>
                    <p>The need for an initiative that engages all stakeholders involved in healthcare education, research, and practice is urgently needed to set up guidelines for the responsible use of ChatGPT by healthcare educators, researchers, and practitioners. The findings of the study are useful for healthcare professionals and health technology regulators.</p>
                </sec>
            </abstract>
            <kwd-group kwd-group-type="author">
                <kwd>Medical Nutrition Therapy</kwd>
                <kwd>Generative AI</kwd>
                <kwd>Large Language Models</kwd>
                <kwd>ChatGPT</kwd>
            </kwd-group>
            <funding-group>
                <funding-statement>The author(s) declared that no grants were involved in supporting this work.</funding-statement>
            </funding-group>
        </article-meta>
    </front>
    <body>
        <sec id="sec5" sec-type="intro">
            <title>Introduction</title>
            <p>Noncommunicable diseases (NCDs), which are also called chronic diseases, are long-lasting and occur because of a combination of factors including genetics, physiology, environment, and behavior.
                <sup>
                    <xref ref-type="bibr" rid="ref1">1</xref>
                </sup> The major categories of NCDs are known as chronic diseases, and they include cardiovascular diseases, which cause 17.9 million deaths every year across the globe. Cancers also contribute significantly to chronic disease, causing 9 million deaths annually. Additionally, chronic respiratory diseases result in 3.9 million deaths each year, and diabetes causes 1.6 million deaths per year.
                <sup>
                    <xref ref-type="bibr" rid="ref1">1</xref>
                </sup>
            </p>
            <p>The rising incidence of chronic illnesses is having a significant financial impact on healthcare systems worldwide, and it has attracted the interest and attention of policymakers and researchers at all levels of government.
                <sup>
                    <xref ref-type="bibr" rid="ref2">2</xref>
                </sup> Typically, the methods employed to manage chronic illnesses are multifaceted, and they revolve around dietary or nutritional interventions, consistent physical exercise, and lifestyle adjustments at their core.
                <sup>
                    <xref ref-type="bibr" rid="ref3">3</xref>
                </sup>
            </p>
            <p>Studies have demonstrated that low-glycemic index (GI) and low-carbohydrate diets are successful in treating type 2 diabetes, and there has been extensive research into the use of unsaturated fatty acids, vitamins, and bioactive compounds in the management of chronic diseases. Although multidimensional approaches are crucial in managing these chronic illnesses, dietary interventions are of paramount importance and occupy a significant role in these strategies.
                <sup>
                    <xref ref-type="bibr" rid="ref2">2</xref>
                </sup>
            </p>
            <p>A chatbot powered by artificial intelligence (AI), 
                <ext-link ext-link-type="uri" xlink:href="https://chat.openai.com/auth/login">ChatGPT</ext-link> (Chat Generative Pre-Trained Transformer), was launched by OpenAI in November 2022. With both supervised and reinforcement learning techniques, it is built on top of OpenAI&#x2019;s GPT-3.5 and GPT-4 large language models (LLMs).
                <sup>
                    <xref ref-type="bibr" rid="ref4">4</xref>
                </sup> By using a two-stage training process, large language models learn from data more efficiently than traditional deep learning models, as they begin self-supervised learning on huge amounts of unannotated data, then fine-tune their performance on smaller, task-specific, annotated datasets based on user specifications.
                <sup>
                    <xref ref-type="bibr" rid="ref5">5</xref>
                </sup>
            </p>
            <p>The original ChatGPT release was based on GPT-3.5 as the foundation, an LLM (Large Language Model) with over 175 billion parameters.
                <sup>
                    <xref ref-type="bibr" rid="ref6">6</xref>
                </sup> The newest OpenAI model, GPT-4 was released on March 14, 2023. It is important to note that ChatGPT&#x2019;s training data is derived from a wide range of online sources, including books, articles, and websites. Utilizing reinforcement learning from human feedback in conversational tasks,
                <sup>
                    <xref ref-type="bibr" rid="ref7">7</xref>
                </sup> ChatGPT can consider the complexity of users&#x2019; intentions to respond effectively to a variety of end-user tasks, such as medical queries.</p>
            <p>A growing amount of medical data and the complexity of clinical decision-making could theoretically benefit clinicians through NLP tools, allowing doctors to make timely, informed decisions. In addition, technological advancements have democratized knowledge, enabling patients to access medical information without relying solely on healthcare professionals. Instead, they are increasingly using search engines, and now artificial intelligence chatbots, to find medical information.
                <sup>
                    <xref ref-type="bibr" rid="ref8">8</xref>
                </sup>
            </p>
            <p>By engaging in conversational interactions, Chat GPT and other recent chatbots provide authoritative-sounding responses to complicated medical queries. Even though ChatGPT is a promising technology, it often produces inaccurate results, meaning caution is warranted when applying it to medical practice and research.
                <sup>
                    <xref ref-type="bibr" rid="ref9">9</xref>
                </sup>
                <sup>&#x2013;</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref13">13</xref>
                </sup> These engines have not been evaluated for accuracy and reliability, especially in terms of open-ended medical questions that doctors and patients might ask.
                <sup>
                    <xref ref-type="bibr" rid="ref10">10</xref>
                </sup>
                <sup>&#x2013;</sup>
                <sup>
                    <xref ref-type="bibr" rid="ref12">12</xref>
                </sup>
            </p>
            <p>Our study aims to assess ChatGPT&#x2019;s ability to generate accurate and comprehensive responses to nutritional queries created by Nutritionist/Dietician. In addition, this will provide an early indication of ChatGPT&#x2019;s reliability as a provider of accurate and complete information. Furthermore, this study will highlight limitations and propose an approach for addressing those.</p>
        </sec>
        <sec id="sec6" sec-type="methods">
            <title>Methods</title>
            <sec id="sec7">
                <title>Ethical considerations</title>
                <p>All participants gave written informed consent. Ethical approval was not required as the study had low risk to participants.</p>
            </sec>
            <sec id="sec8">
                <title>Study design</title>
                <p>The study uses a case study approach to achieve the research objectives stated in the earlier section. It provides rich and detailed data that can be used to gain a deep understanding of a particular case. It allows for the exploration of complex phenomena that cannot be easily studied through other research methods.
                    <sup>
                        <xref ref-type="bibr" rid="ref14">14</xref>
                    </sup> Although there are limitations to the case study method it is one of the most useful tools in the exploratory study of abstract and evolving phenomena. The type of case study method utilized in this study is Illustrative case studies.
                    <sup>
                        <xref ref-type="bibr" rid="ref15">15</xref>
                    </sup> The approach used in this study is borrowed from functional testing and quality Assurance practices in software development. Functional testing involves creating test cases based on the functional requirements of the software application. These test cases are designed to evaluate whether the software performs as expected. Functional testing is typically performed using black box testing techniques, which means that the tester does not have access to the source code of the software application. In this case, ChatGPT acts as a black box for the researchers involved in this study.</p>
                <p>To evaluate the performance of ChatGPT in medical nutrition therapy a well-defined Study Protocol was used. The steps followed in the study as follows:</p>
                <p>
                    <bold>Step 1:</bold> Creation of questions (scenarios) of varied complexity by public health professionals. The questions were selected by the licensed medial nutrition therapist working in UAE. The selected scenario was simple diet consultation to patient with comorbid conditions.</p>
                <p>
                    <bold>Step 2:</bold> The response of ChatGPT was taken and recorded for further analysis.</p>
                <p>
                    <bold>Step 3:</bold> The responses from Step 2 were evaluated by a registered nutritionist for accuracy.</p>
                <p>
                    <bold>Step 4:</bold> Based on the accuracy the potential of harm score for the response was created.</p>
                <p>
                    <bold>Step 5:</bold> Data was summarized and analyzed by the expert group used in Step 1.</p>
            </sec>
            <sec id="sec9">
                <title>Sample</title>
                <p>The expert group for deciding complexity contained five public health professionals working in the United Arab Emirates. The experts were selected from Gulf Medical University, UAE and method of selection was nonrandom purposive sampling. The inclusion criteria for the expert were master&#x2019;s degree and clinical experience greater than five years. The researchers involved in this study approached 7 healthcare professionals out of which five agreed to be part of the expert group. The researchers wanted to recruit five to nine experts as a number greater than that if difficult to handle and a number less than that may result in bias.</p>
                <p>For accuracy, one registered nutritionist&#x2019;s response was taken for step 3. The nutritionist gave a score on a ordinal scale of one to ten where one being least and ten being most accurate.</p>
                <p>To ascertain the potential of harm in Step 4 all five experts discussed earlier worked together.</p>
                <p>The method utilized for reaching consensus was the Delphi method depicted in 
                    <xref ref-type="fig" rid="f1">Figure 1</xref>.
                    <sup>
                        <xref ref-type="bibr" rid="ref16">16</xref>
                    </sup> Using the steps mentioned above and data provided in the support material the reproducibility of the research can be established. Again, a scale of one to ten was used to ascertain the potential to harm where one being least and ten being highest.</p>
                <fig fig-type="figure" id="f1" orientation="portrait" position="float">
                    <label>Figure 1. </label>
                    <caption>
                        <title>Approach for Delphi method used in the study.</title>
                        <p>Source: Author&#x2019;s Compilation.</p>
                    </caption>
                    <graphic id="gr1" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/155982/efcf7f3f-2289-403c-9956-46ec075c72ba_figure1.gif"/>
                </fig>
                <p>The Delphi method is a structured communication technique originally developed as a systematic, interactive forecasting method that relies on a panel of experts. The experts answered questionnaires in three rounds. After each round, a researcher VM provides an anonymous summary of the from the previous round as well as the reasons they provided for their judgments. Thus, the experts are encouraged to revise their earlier answers considering the replies of other members of their panel. It was observed that the range of the answers decreased, and the group converged. Finally, the process is stopped after a predefined stop and the median scores of the final rounds determined the results (
                    <xref ref-type="fig" rid="f1">Figure 1</xref>).</p>
                <p>The conceptual definitions of the terms used in the study are as follows:</p>
                <p>
                    <italic toggle="yes">Clinical Accuracy:</italic> &#x201c;A clinical accuracy is a qualitative approach that describes the clinical outcome of basing a treatment decision on the result of a measurement method being evaluated&#x201d;.
                    <sup>
                        <xref ref-type="bibr" rid="ref17">17</xref>
                    </sup>
                </p>
                <p>
                    <italic toggle="yes">Complexity of the Clinical Problem:</italic> &#x201c;Clinical complexity is a protean term encompassing multiple levels and domains. Illustratively, a prominent concern in health care involves a multiplicity of disorders and conditions experienced by a person along with their cross-sectional and longitudinal contexts&#x201d;.
                    <sup>
                        <xref ref-type="bibr" rid="ref18">18</xref>
                    </sup>
                </p>
                <p>
                    <italic toggle="yes">Potential for Harm:</italic> &#x201c;Harm means an injury to the rights, safety or welfare of a research participant that may include physical, psychological, social, financial or economic factors&#x201d;.
                    <sup>
                        <xref ref-type="bibr" rid="ref19">19</xref>
                    </sup>
                </p>
            </sec>
        </sec>
        <sec id="sec10" sec-type="results|discussion">
            <title>Results &amp; discussion</title>
            <p>This section discusses the results obtained from the illustrative case study method described in the earlier section.</p>
            <sec id="sec11">
                <title>Case 1: 35-year-old female to reduce 10 kgs in a month</title>
                <p>The question is simple, with age, gender, and a weight loss goal provided. The statement emphasizes the importance of sustainable weight loss and the potential risks of rapid weight loss. The provided diet chart is low carb, high fiber/protein, suitable for the given condition. There is a negligible risk for a user following this diet unless they have comorbid conditions. The statement that a diet chart need not contain caloric information is not true, as it serves as a guideline for achieving a caloric deficit to aid in weight loss. In terms of the evaluation criteria, the statement receives a complexity score of 2, an accuracy score of 8, and a potential risk score of 1. This result suggests that in case of lower complexity the accuracy of the information is higher and potential risk is lower.</p>
            </sec>
            <sec id="sec12">
                <title>Case 2: 35-year-old female with BMI 34 to reduce weight</title>
                <p>The question is slightly more complex than the previous one, with the addition of BMI information. For a person with a BMI of 34, a calorie-deficit diet is required for weight loss. The diet, however, does not specify the amount of oil to be consumed, which can significantly increase the calorie count. The diet is not specific, and portions are assumed, which may result in a diet of around 1400-1500 calories, which may not be enough to achieve the target weight loss. A layperson following this guide may not achieve their weight loss target as the diet provided is not guided. In terms of the evaluation criteria, the statement receives a complexity score of 3, an accuracy score of 7, and a potential risk score of 3. This result again supports the finding of the case 1 as increase in the complexity score reduces the accuracy while increasing the potential to harm.</p>
            </sec>
            <sec id="sec13">
                <title>Case 3: 35-year-old female with BMI 34 also having PCOS to reduce weight</title>
                <p>The complexity of the question increases with the addition of the condition of PCOS. The diet provided is like the previous question with the addition of extra guidelines for PCOS, which is general information. However, the diet provided is not specific to the condition, and a user following it may not achieve their weight loss target, but they are not at potential risk for harm. In terms of the evaluation criteria, the statement receives a complexity score of 4, an accuracy score of 6, and a potential risk score of 4. The result of this case is also concurring the hypothesis complexity to question asked results in less accuracy and higher risk to harm.</p>
            </sec>
            <sec id="sec14">
                <title>Case 4: 40-year-old male with diabetes</title>
                <p>The question is complex due to the mention of diabetes, which requires consideration of many factors before preparing a diet chart. A simple statement of diabetes does not provide enough information, and the patient should be asked about the type of diabetes, medications, and recent blood reports. Calories, BMI, and current physical activity are critical considerations for a diabetic diet. The patient is at risk of developing hypoglycemia if they are on insulin and have a low BMI or high activity levels. A dietitian would consider all these factors while preparing a plan for a diabetic patient. In terms of the evaluation criteria, the statement receives a complexity score of 4, an accuracy score of 5, and a potential risk score of 6. The complexity score of four for an older patient has less accuracy and high potential of harm. Does age contribute to potential to harm? This question needs to be further tested empirically.</p>
            </sec>
            <sec id="sec15">
                <title>Case 5: 40-year-old male with diabetes and CKD</title>
                <p>The complexity of the question increases with the addition of chronic kidney disease (CKD), which requires consideration of several factors while preparing a diet chart, such as the stage of CKD and the current level of potassium and sodium in the blood. However, the statement receives a low accuracy score of 4 as the diet generated does not mention limiting the sodium intake to at least 1.5 g/day, which is essential for CKD patients. Additionally, the diet contains high sources of protein, 75-80 g, which is much higher than what is recommended for a CKD patient and not calculated as per patient weight and CKD level. As a result, a layperson following this diet may be at a high potential for risk, indicating a high potential for risk score of 8. In terms of the evaluation criteria, the statement receives a complexity score of 5, an accuracy score of 4, and a potential risk score of 8. Increasing complexity of the query from 4 to 5 increases the potential of risk from 6 to 8. This makes us conclude that with increasing complexity the increase in potential harm increases exponentially after a point. This phenomenon needs to be further tested empirically.</p>
            </sec>
            <sec id="sec16">
                <title>Case 6: 40-year-old male with diabetes, hypertension, and CKD</title>
                <p>The complexity of the question increases with the addition of hypertension as a comorbidity. However, the diet chart provided is the same as the previous question, which does not pose much risk for diabetes and hypertension but poses all the risks previously mentioned for CKD. Therefore, the statement receives a low accuracy score of 4. Additionally, patients need to be educated about sugar and salt sources, and general guidelines are not enough. Measurements should be incorporated into the diet plan itself to avoid potential risks. As a result, the statement receives a high potential for risk score of 8. In terms of the evaluation criteria, the statement receives a complexity score of 6, an accuracy score of 4, and a potential risk score of 8. The finding of this study doesn&#x2019;t concurs the finding of the case 5 that increase in potential harm with increase in exponential as increasing complexity potential risk remains same.</p>
            </sec>
            <sec id="sec17">
                <title>Case 7: 40-year-old male with diabetes, hypertension, and CKD Indian with a gluten allergy</title>
                <p>The complexity of the question increases with the addition of gluten sensitivity. As a result, a proper dietitian is required to prepare a diet plan that takes into consideration the patient&#x2019;s multiple comorbidities and dietary restrictions. However, the statement is lacking in accuracy as it does not provide any specific information on how to prepare a diet plan for a person with these conditions. Therefore, the accuracy score is low at 4. Additionally, without a specific diet plan, there is potential for risk for the patient with so many comorbidities and dietary restrictions. As a result, the statement receives a high potential for risk score of 8. In terms of the evaluation criteria, the statement receives a complexity score of 7, an accuracy score of 4, and a potential risk score of 8. The findings from this case also concurs the finding from the earlier cases. The increase in complexity is inversely proportional to accuracy while directly proportional to the potential risk.</p>
            </sec>
            <sec id="sec18">
                <title>Case 8: 30-year-old female height 150 cm weight 80 kg, having PCOS, hypothyroidism, insulin resistance with gluten sensitivity, HbA1c 6% for weight loss</title>
                <p>This question is extraordinarily complex with multiple parameters given, including the condition of hypothyroidism. The ideal diet for this patient is a low-carb, high-protein, anti-inflammatory diet, with the need to avoid goitrogenic foods like soy products. However, the accuracy of the given information is relatively low, and there is still a potential risk for the patient if not properly guided by a qualified dietitian. Overall scores are as follows: Complexity - 8, Accuracy - 3, Potential for Risk - 6. This study examines the case of highest complexity and hence minimum accuracy. The risk score for this study was expected to be highest but that is not the case. This finding does not concur the findings from the earlier seven cases.</p>
                <p>The summary of the analysis of eight cases is listed in 
                    <xref ref-type="table" rid="T1">Table 1</xref>.</p>
                <table-wrap id="T1" orientation="portrait" position="float">
                    <label>Table 1. </label>
                    <caption>
                        <title>Summary of the illustrative case study analysis.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Case number</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Complexity</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Accuracy</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Potential to harm</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Case 1</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">2</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">8</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">1</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Case 2</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">3</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">7</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">3</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Case 3</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">4</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">6</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">4</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Case 4</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">4</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">5</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">6</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Case 5</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">5</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">4</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">8</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Case 6</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">6</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">4</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">8</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Case 7</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">7</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">4</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">8</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Case 8</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">8</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">3</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">6</td>
                            </tr>
                        </tbody>
                    </table>
                    <table-wrap-foot>
                        <p>Source: Author&#x2019;s Compilation.</p>
                    </table-wrap-foot>
                </table-wrap>
                <p>As depicted in 
                    <xref ref-type="fig" rid="f2">Figure 2</xref>, the complexity of the scenario increases risk potential also increases. That suggests that ChatGPT should be avoided for complex medical conditions/scenarios. Researchers believe accuracy does not change much with an increase in complexity and needs to be further evaluated empirically.</p>
                <fig fig-type="figure" id="f2" orientation="portrait" position="float">
                    <label>Figure 2. </label>
                    <caption>
                        <title>Summary of the case analysis.</title>
                    </caption>
                    <graphic id="gr2" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/155982/efcf7f3f-2289-403c-9956-46ec075c72ba_figure2.gif"/>
                </fig>
                <p>The findings of the study are supported by the researcher Johnson 
                    <italic toggle="yes">et al.</italic> (2023). They observed that ChatGPT can produce accurate information to diverse medical queries as judged by academic physician specialists although with important limitations. Further research and model development are needed to correct inaccuracies and for validation.
                    <sup>
                        <xref ref-type="bibr" rid="ref20">20</xref>
                    </sup> Another group of researchers found that ChatGPT provides medical information of comparable quality to available static internet information.
                    <sup>
                        <xref ref-type="bibr" rid="ref21">21</xref>
                    </sup> Another recent study suggests cautious approach against use of the ChatGPT in clinical practice. They lament that it doesn&#x2019;t provide references for the information hence is not reliable for clinical use. Thus, the findings of this study also suggest the cautious use of ChatGPT in medical nutrition therapy as irresponsible use has potential harm for the user. The study assessing the accuracy and potential risks of using nutrition therapy information provided by ChatGPT, evaluated by nutritionists and a group of experts, has several limitations that must be considered when interpreting its results. ChatGPT&#x2019;s responses are based on the information available up to its last training data, which might not include the latest research or updated guidelines in nutrition therapy. This time-lag in information can introduce a bias towards outdated practices or missing new evidence-based approaches. The accuracy and risk assessments made by the nutritionists and experts are subjective and can vary based on their individual experiences, knowledge, and biases. This variability can introduce both direction and magnitude biases in the evaluation process. The experts and nutritionists might have preconceived notions about the reliability of AI-generated information, which could influence their assessment of ChatGPT&#x2019;s responses, either positively or negatively. The range and type of nutrition therapy questions asked may not comprehensively cover the vast field of nutrition. Thus, the study&#x2019;s findings might not be generalizable across all areas of nutrition therapy.</p>
            </sec>
        </sec>
        <sec id="sec19" sec-type="conclusion">
            <title>Conclusion</title>
            <p>The primary objective of the present study was to assess the accuracy and comprehensiveness of ChatGPT&#x2019;s responses to nutritional queries generated by nutritionists/dieticians. To achieve this, an in-depth case study approach was employed. Functional testing was conducted by creating test cases that aligned with the functional requirements of the software application. ChatGPT&#x2019;s responses were evaluated and analyzed in different scenarios that involved medical nutritional therapy, varying in complexity. The accuracy of the generated data was assessed by a registered nutritionist, and a potential harm score was used to evaluate the responses provided by ChatGPT.</p>
            <p>When several case scenarios with varying levels of complexity were evaluated for their risk potential, it was demonstrated that as the complexity increased, so did the potential risk. The study suggests that the ChatGPT should not be used for complex medical nutrition situations and conditions, even though the accuracy of the generated response does not change much with the complexity of the case scenario.</p>
            <p>The study&#x2019;s findings have important clinical implications for practitioners, particularly nutritionists, and dieticians, who may use ChatGPT or similar AI-powered tools in their practice. Practitioners should exercise caution and avoid relying solely on ChatGPT for complex cases that require specialized knowledge and expertise.</p>
            <p>The study&#x2019;s findings underscore the importance of using ChatGPT or similar AI-powered tools appropriately in clinical practice. It should not be used as a replacement for professional judgment or clinical decision-making, particularly in complex medical nutrition situations. Practitioners, especially nutritionists and dietitians, should consider ChatGPT as a complementary tool to support their clinical practice, and not solely rely on it for making critical nutrition-related decisions. This study emphasizes the importance of human verification and not solely relying on AI-generated information.</p>
            <p>The findings of the study have important implications for policymakers. One key recommendation is to exercise caution when implementing generative AI, such as ChatGPT, in clinical practice. Rushing to adopt such tools without thorough evaluation and validation may not be advisable. While generative AI has the potential to improve efficiency in healthcare operations, it should be considered as a decision support system for registered practitioners, rather than a standalone tool for making clinical decisions.</p>
            <p>It is important to note that patients should not rely solely on generative AI for self-medication or medical nutrition therapy, especially in situations where multiple health conditions (comorbidities) are involved. This is because generative AI tools like ChatGPT may not have the ability to fully assess and address the complexities of comorbid conditions, which could potentially result in harm to patients.
                <sup>
                    <xref ref-type="bibr" rid="ref22">22</xref>
                </sup>
            </p>
            <p>In conclusion, a collaborative effort involving all stakeholders in healthcare education, research, and practice is urgently needed to establish guidelines for the responsible use of ChatGPT by educators, researchers, and practitioners.</p>
            <sec id="sec20">
                <title>Limitations of the study</title>
                <p>The study used a small sample size which could affect the accuracy of the results. Another limitation is the dynamic nature of technology. Since technology is constantly evolving and improving, the results of the study may need to be reevaluated after a few days or weeks to account for any changes or updates. Additionally, the study&#x2019;s reliance on only one nutritionist to assess accuracy introduces the possibility of bias and human errors.</p>
            </sec>
        </sec>
    </body>
    <back>
        <sec id="sec23" sec-type="data-availability">
            <title>Data availability</title>
            <p>Figshare: Evaluation of accuracy and potential harm of ChatGPT in medical nutrition therapy &#x2013; a case-based approach. 
                <ext-link ext-link-type="uri" xlink:href="https://dx.doi.org/10.6084/m9.figshare.24547276">https://dx.doi.org/10.6084/m9.figshare.24547276</ext-link>.</p>
            <p>Data are available under the terms of the 
                <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International license</ext-link> (CC-BY 4.0).</p>
        </sec>
        <ack>
            <title>Acknowledgments</title>
            <p>The authors of this study are grateful to Datta Meghe Institute of Higher Education &amp; Research, Gulf Medical University, and Thumbay University Hospital for the infrastructural support provided for completion of this research work.</p>
        </ack>
        <ref-list>
            <title>References</title>
            <ref id="ref1">
                <label>1</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Drozd</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Pujades-Rodriguez</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lillie</surname>
                            <given-names>PJ</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Non-communicable disease, sociodemographic factors, and risk of death from infection: a UK Biobank observational cohort study.</article-title>
                    <source>

                        <italic toggle="yes">Lancet Infect. Dis.</italic>
</source>
                    <year>2021</year>;<volume>21</volume>(<issue>8</issue>):<fpage>1184</fpage>&#x2013;<lpage>1191</lpage>.
                    <pub-id pub-id-type="pmid">33662324</pub-id>
                    <pub-id pub-id-type="doi">10.1016/S1473-3099(20)30978-6</pub-id>
                    <pub-id pub-id-type="pmcid">PMC8323124</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref2">
                <label>2</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Stefano</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Marco</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Daniela</surname>
                            <given-names>C</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Nutritional knowledge of nursing students: A systematic literature review.</article-title>
                    <source>

                        <italic toggle="yes">Nurse Educ. Today.</italic>
</source>
                    <year>2023</year>;<fpage>105826</fpage>.</mixed-citation>
            </ref>
            <ref id="ref3">
                <label>3</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Magliano</surname>
                            <given-names>DJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Boyko</surname>
                            <given-names>EJ</given-names>
                        </name>
</person-group>:
                    <article-title>IDF diabetes atlas.</article-title>
                    <year>2022</year>.</mixed-citation>
            </ref>
            <ref id="ref4">
                <label>4</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Biswas</surname>
                            <given-names>SS</given-names>
                        </name>
</person-group>:
                    <article-title>Role of ChatGPT in public health.</article-title>
                    <source>

                        <italic toggle="yes">Ann. Biomed. Eng.</italic>
</source>
                    <year>2023</year>;<volume>51</volume>(<issue>5</issue>):<fpage>868</fpage>&#x2013;<lpage>869</lpage>.
                    <pub-id pub-id-type="pmid">36920578</pub-id>
                    <pub-id pub-id-type="doi">10.1007/s10439-023-03172-7</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref5">
                <label>5</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Shen</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Heacock</surname>
                            <given-names>L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Elias</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>ChatGPT and other large language models are double-edged swords.</article-title>
                    <source>

                        <italic toggle="yes">Radiology.</italic>
</source>
                    <year>2023</year>;<volume>307</volume>(<issue>2</issue>):<fpage>e230163</fpage>.
                    <pub-id pub-id-type="pmid">36700838</pub-id>
                    <pub-id pub-id-type="doi">10.1148/radiol.230163</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref6">
                <label>6</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Shen</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Heacock</surname>
                            <given-names>L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Elias</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>ChatGPT and other large language models are double-edged swords.</article-title>
                    <source>

                        <italic toggle="yes">Radiology.</italic>
</source>
                    <year>2023</year>;<volume>307</volume>(<issue>2</issue>):<fpage>e230163</fpage>.
                    <pub-id pub-id-type="pmid">36700838</pub-id>
                    <pub-id pub-id-type="doi">10.1148/radiol.230163</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref7">
                <label>7</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Jaques</surname>
                            <given-names>N</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ghandeharioun</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Shen</surname>
                            <given-names>JH</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Way off-policy batch deep reinforcement learning of implicit human preferences in dialog.</article-title>
                    <source>

                        <italic toggle="yes">arXiv preprint arXiv:1907.00456.</italic>
</source>
                    <year>2019</year>.</mixed-citation>
            </ref>
            <ref id="ref8">
                <label>8</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Vaira</surname>
                            <given-names>LA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lechien</surname>
                            <given-names>JR</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Abbate</surname>
                            <given-names>V</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Accuracy of ChatGPT-Generated Information on Head and Neck and Oromaxillofacial Surgery: A Multicenter Collaborative Analysis.</article-title>
                    <source>

                        <italic toggle="yes">Otolaryngol. Head Neck Surg.</italic>
</source>
                    <year>2023</year>.
                    <pub-id pub-id-type="pmid">37595113</pub-id>
                    <pub-id pub-id-type="doi">10.1002/ohn.489</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref9">
                <label>9</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Hosseini</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Rasmussen</surname>
                            <given-names>LM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Resnik</surname>
                            <given-names>DB</given-names>
                        </name>
</person-group>:
                    <article-title>Using AI to write scholarly publications.</article-title>
                    <source>

                        <italic toggle="yes">Account. Res.</italic>
</source>
                    <year>2023</year>;<fpage>1</fpage>&#x2013;<lpage>9</lpage>.
                    <pub-id pub-id-type="doi">10.1080/08989621.2023.2168535</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref10">
                <label>10</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Thorp</surname>
                            <given-names>HH</given-names>
                        </name>
</person-group>:
                    <article-title>ChatGPT is fun, but not an author.</article-title>
                    <source>

                        <italic toggle="yes">Science.</italic>
</source>
                    <year>2023</year>;<volume>379</volume>(<issue>6630</issue>):<fpage>313</fpage>&#x2013;<lpage>313</lpage>.
                    <pub-id pub-id-type="pmid">36701446</pub-id>
                    <pub-id pub-id-type="doi">10.1126/science.adg7879</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref11">
                <label>11</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Shah</surname>
                            <given-names>FA</given-names>
                        </name>
</person-group>:
                    <article-title>IS Chat-GPT A Silver Bullet for Scientific Manuscript Writing?</article-title>
                    <source>

                        <italic toggle="yes">J. Postgrad. Med. Inst.</italic>
</source>
                    <year>2023</year>;<volume>37</volume>(<issue>1</issue>):<fpage>1</fpage>&#x2013;<lpage>2</lpage>.</mixed-citation>
            </ref>
            <ref id="ref12">
                <label>12</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Flanagin</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bibbins-Domingo</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Berkwits</surname>
                            <given-names>M</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Nonhuman &#x201c;authors&#x201d; and implications for the integrity of scientific publication and medical knowledge.</article-title>
                    <source>

                        <italic toggle="yes">JAMA.</italic>
</source>
                    <year>2023</year>;<volume>329</volume>(<issue>8</issue>):<fpage>637</fpage>&#x2013;<lpage>639</lpage>.
                    <pub-id pub-id-type="pmid">36719674</pub-id>
                    <pub-id pub-id-type="doi">10.1001/jama.2023.1344</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref13">
                <label>13</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Goodman</surname>
                            <given-names>RS</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Patrinely</surname>
                            <given-names>JR</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Osterman</surname>
                            <given-names>T</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>On the cusp: Considering the impact of artificial intelligence language models in healthcare.</article-title>
                    <source>

                        <italic toggle="yes">Med.</italic>
</source>
                    <year>2023</year>;<volume>4</volume>(<issue>3</issue>):<fpage>139</fpage>&#x2013;<lpage>140</lpage>.
                    <pub-id pub-id-type="pmid">36905924</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.medj.2023.02.008</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref14">
                <label>14</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Yin</surname>
                            <given-names>RK</given-names>
                        </name>
</person-group>:
                    <article-title>The case study method as a tool for doing evaluation.</article-title>
                    <source>

                        <italic toggle="yes">Curr. Sociol.</italic>
</source>
                    <year>1992</year>;<volume>40</volume>(<issue>1</issue>):<fpage>121</fpage>&#x2013;<lpage>137</lpage>.
                    <pub-id pub-id-type="doi">10.1177/001139292040001009</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref15">
                <label>15</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Heaton</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Day</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Britten</surname>
                            <given-names>N</given-names>
                        </name>
</person-group>:
                    <article-title>Collaborative research and the co-production of knowledge for practice: an illustrative case study.</article-title>
                    <source>

                        <italic toggle="yes">Implement. Sci.</italic>
</source>
                    <year>2015</year>;<volume>11</volume>:<fpage>1</fpage>&#x2013;<lpage>10</lpage>.
                    <pub-id pub-id-type="doi">10.1186/s13012-016-0383-9</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref16">
                <label>16</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Chapman</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>MacLaurin</surname>
                            <given-names>T</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Powell</surname>
                            <given-names>D</given-names>
                        </name>
</person-group>:
                    <article-title>Food safety info sheets: Design and refinement of a narrative-based training intervention.</article-title>
                    <source>

                        <italic toggle="yes">Br. Food J.</italic>
</source>
                    <year>2011</year>;<volume>113</volume>(<issue>2</issue>):<fpage>160</fpage>&#x2013;<lpage>186</lpage>.
                    <pub-id pub-id-type="doi">10.1108/00070701111105286</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref17">
                <label>17</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Boren</surname>
                            <given-names>SA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Clarke</surname>
                            <given-names>WL</given-names>
                        </name>
</person-group>:
                    <article-title>Analytical and clinical performance of blood glucose monitors.</article-title>
                    <source>

                        <italic toggle="yes">J. Diabetes Sci. Technol.</italic>
</source>
                    <year>2010</year>;<volume>4</volume>(<issue>1</issue>):<fpage>84</fpage>&#x2013;<lpage>97</lpage>.
                    <pub-id pub-id-type="pmid">20167171</pub-id>
                    <pub-id pub-id-type="doi">10.1177/193229681000400111</pub-id>
                    <pub-id pub-id-type="pmcid">PMC2825628</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref18">
                <label>18</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Mezzich</surname>
                            <given-names>JE</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Salloum</surname>
                            <given-names>IM</given-names>
                        </name>
</person-group>:
                    <article-title>Clinical complexity and person-centered integrative diagnosis.</article-title>
                    <source>

                        <italic toggle="yes">World Psychiatry.</italic>
</source>
                    <year>2008</year>;<volume>7</volume>(<issue>1</issue>):<fpage>1</fpage>&#x2013;<lpage>2</lpage>.
                    <pub-id pub-id-type="pmid">18458769</pub-id>
                    <pub-id pub-id-type="doi">10.1002/j.2051-5545.2008.tb00138.x</pub-id>
                    <pub-id pub-id-type="pmcid">PMC2327227</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref19">
                <label>19</label>
                <mixed-citation publication-type="other">
                    <article-title>Guideline: Harm and risk in research - University College Dublin.</article-title>
                    <year>[cited 2023Apr25]</year>.
                    <ext-link ext-link-type="uri" xlink:href="https://www.ucd.ie/researchethics/t4media/HRECG3/Harm/and/Risk/in/Research//-/140921.pdf">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref20">
                <label>20</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Johnson</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Goodman</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Patrinely</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Assessing the accuracy and reliability of AI-generated medical responses: an evaluation of the Chat-GPT model.</article-title>
                    <source>

                        <italic toggle="yes">Research Square.</italic>
</source>
                    <year>2023</year>.</mixed-citation>
            </ref>
            <ref id="ref21">
                <label>21</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Walker</surname>
                            <given-names>HL</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ghani</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kuemmerli</surname>
                            <given-names>C</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Reliability of medical information provided by ChatGPT: assessment against clinical guidelines and patient information quality instrument.</article-title>
                    <source>

                        <italic toggle="yes">J. Med. Internet Res.</italic>
</source>
                    <year>2023</year>;<volume>25</volume>:<fpage>e47479</fpage>.
                    <pub-id pub-id-type="pmid">37389908</pub-id>
                    <pub-id pub-id-type="doi">10.2196/47479</pub-id>
                    <pub-id pub-id-type="pmcid">PMC10365578</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref22">
                <label>22</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Whiles</surname>
                            <given-names>BB</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bird</surname>
                            <given-names>VG</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Canales</surname>
                            <given-names>BK</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Caution! AI bot has entered the patient chat: ChatGPT has limitations in providing accurate urologic healthcare advice.</article-title>
                    <source>

                        <italic toggle="yes">Urology.</italic>
</source>
                    <year>2023</year>;<volume>180</volume>:<fpage>278</fpage>&#x2013;<lpage>284</lpage>.
                    <pub-id pub-id-type="pmid">37467806</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.urology.2023.07.010</pub-id>
                </mixed-citation>
            </ref>
        </ref-list>
    </back>
    <sub-article article-type="reviewer-report" id="report258226">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.155982.r258226</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Podszun</surname>
                        <given-names>Maren C.</given-names>
                    </name>
                    <xref ref-type="aff" rid="r258226a1">1</xref>
                    <role>Referee</role>
                </contrib>
                <aff id="r258226a1">
                    <label>1</label>University of Hohenheim, Stuttgart, Germany</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>6</day>
                <month>5</month>
                <year>2024</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2024 Podszun MC</copyright-statement>
                <copyright-year>2024</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport258226" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.142428.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>reject</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>The authors have investigated the accuracy of ChatGPT for medical nutrition therapy. They selected a case study-based approach with queries increasing in complexity and then a nutritionist evaluated the given output for accuracy. The output was furthermore scaled for potential of harm. While the topic is certainly a hot one and very important there are some shortcomings in the current version that need to be addressed.</p>
            <p> &#x00a0; 
                <list list-type="bullet">
                    <list-item>
                        <p>One diet plan per condition is too little to make any inference about the accuracy, please consult a statistician to calculate the number of plans needed for a sound statistical analysis.</p>
                    </list-item>
                    <list-item>
                        <p>Assessment for accuracy by one nutritionist is too little, I would suggest to at least add two others that are blinded to the previous answers. The rational for the number of experts is week. It&#x2019;s further confusing why public health professionals were chosen and not nutritionists</p>
                    </list-item>
                    <list-item>
                        <p>Please indicate the version of ChatGPT. It is a tremendous difference whether ChatGPT-3.5 or 4 was used, as ChatGPT4 is connected to the internet and will have different output. Please also indicate the time (date) of data collection</p>
                    </list-item>
                    <list-item>
                        <p>The exact prompts used for the cases need to be given in the method section and not just the supplementary data</p>
                    </list-item>
                    <list-item>
                        <p>The manuscript would benefit from English language/ grammar service to improve clarity</p>
                    </list-item>
                </list>
            </p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>Partly</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>No</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>Yes</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>No</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>Partly</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Yes</p>
            <p>Reviewer Expertise:</p>
            <p>Nutrition Science, MASLD, AI in nutrition</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to state that I do not consider it to be of an acceptable scientific standard, for reasons outlined above.</p>
        </body>
    </sub-article>
    <sub-article article-type="reviewer-report" id="report265582">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.155982.r265582</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Kirk</surname>
                        <given-names>Daniel</given-names>
                    </name>
                    <xref ref-type="aff" rid="r265582a1">1</xref>
                    <xref ref-type="aff" rid="r265582a2">2</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0001-7738-7686</uri>
                </contrib>
                <aff id="r265582a1">
                    <label>1</label>Wageningen University &amp; Research, Wageningen, Gelderland, The Netherlands</aff>
                <aff id="r265582a2">
                    <label>2</label>Department of Twin Research &amp; Genetic Epidemiology, King's College London (Ringgold ID: 4616), London, England, UK</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>25</day>
                <month>4</month>
                <year>2024</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2024 Kirk D</copyright-statement>
                <copyright-year>2024</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport265582" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.142428.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve-with-reservations</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>As more people rely on ChatGPT as a source of information, the authors aim to evaluate the competency of ChatGPT at answering nutrition questions asked by a nutritionist using a case-study approach. The approach is interesting and the topic of chatbots for managing chronic diseases is important and highly relevant.</p>
            <p> &#x00a0; 
                <list list-type="bullet">
                    <list-item>
                        <p>&#x201c;The major categories of NCDs are known as chronic diseases,&#x201d; this is repetitive and unnecessary given the first sentence. I would just say &#x201c;The major categorised of NCDs are&#x2026;.&#x201d;</p>
                    </list-item>
                    <list-item>
                        <p>&#x201c;Chat GPT&#x201d; should be corrected to ChatGPT (without a space between)</p>
                    </list-item>
                    <list-item>
                        <p>We have previously published an article that compared the quality of answers to nutrition questions between ChatGPT and human dietitians (Ref [1]). In our study, we found ChatGPT performed well on all metrics but, importantly, we excluded medical questions. Given that the authors&#x2019; find that accuracy was not compromised but risk potential was higher with increasing complexity, this is interesting. Discussing these findings in the context of their own would enrich the author&#x2019;s article.</p>
                    </list-item>
                    <list-item>
                        <p>At the end of the intro, the authors state that the questions were asked by &#x201c;Nutritionist/Dietician&#x201d;. First, &#x201c;a&#x201d; is missing before this (or the noun should be pluralized). Second, nutritionists and dietitians are similar but different, with the conditions for naming oneself a dietitian being more stringent. The authors should specify which the chose here.</p>
                    </list-item>
                    <list-item>
                        <p>&#x201c;The approach used in this study is borrowed from functional testing and quality Assurance practices in software development.&#x201d; There should be citations here for those that are not from a software development background.</p>
                    </list-item>
                    <list-item>
                        <p>&#x201c;The researchers wanted to recruit five to nine experts as a number greater than that if difficult to handle and a number less than that may result in bias.&#x201d; We chose a similar number of experts for similar reasons. This may be used in support of the authors&#x2019; approach.</p>
                    </list-item>
                    <list-item>
                        <p>&#x201c;Step 3: The responses from Step 2 were evaluated by a registered nutritionist for accuracy.&#x201d; This represents one of the most vulnerable points of the authors&#x2019; study. What constitutes as a &#x201c;good&#x201d;/correct answer in the field of nutrition can be (unfortunately) quite subjective. Topics in nutrition can be polarized and a nutritionist&#x2019;s interpretation of the science can be disproportionately influenced by their own experience. Since the scoring of ChatGPT&#x2019;s answers was only performed by only one individual, the author&#x2019;s results become subject to the knowledge and belief&#x2019;s of only one single individual. How do the author&#x2019;s justify their methodology in spite of this?</p>
                    </list-item>
                    <list-item>
                        <p>The prompts given to, and responses from, ChatGPT should be made available.</p>
                    </list-item>
                    <list-item>
                        <p>I think the research would benefit from having some type of a control group (i.e., scores of answers from human experts). At present, it cannot be discounted that questions of increasing complexity naturally lead to higher potential of harm. In this case, this would not be a limitation of ChatGPT but rather a function of complicated questions. However, since there is no control group, this cannot be known.</p>
                    </list-item>
                    <list-item>
                        <p>There is insufficient detail in the discussion of the results. The authors mention the results of similar studies but do not contextualise these their own findings in these others.</p>
                    </list-item>
                    <list-item>
                        <p>The authors do well in the introduction to set the scene for the motivation of research on chatbots for managing chronic diseases but then do not elaborate further on this when discussing their own results. The authors mention what the findings mean for policymakers and practitioners but it would be worth discussing what these findings mean for individuals with chronic diseases who might wish to use ChatGPT for obtaining information and what they mean for the future of chatbots in a medical context.</p>
                    </list-item>
                </list>
            </p>
            <p>Is the work clearly and accurately presented and does it cite the current literature?</p>
            <p>Partly</p>
            <p>If applicable, is the statistical analysis and its interpretation appropriate?</p>
            <p>Not applicable</p>
            <p>Are all the source data underlying the results available to ensure full reproducibility?</p>
            <p>No</p>
            <p>Is the study design appropriate and is the work technically sound?</p>
            <p>Yes</p>
            <p>Are the conclusions drawn adequately supported by the results?</p>
            <p>Partly</p>
            <p>Are sufficient details of methods and analysis provided to allow replication by others?</p>
            <p>Partly</p>
            <p>Reviewer Expertise:</p>
            <p>nutrition, machine learning, biochemistry</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above.</p>
        </body>
        <back>
            <ref-list>
                <title>References</title>
                <ref id="rep-ref-265582-1">
                    <label>1</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Comparison of Answers between ChatGPT and Human Dieticians to Common Nutrition Questions.</article-title>
                        <source>
                            <italic>J Nutr Metab</italic>
                        </source>.<year>2023</year>;<volume>2023</volume>:
                        <elocation-id>10.1155/2023/5548684</elocation-id>
                        <fpage>5548684</fpage>
                        <pub-id pub-id-type="pmid">38025546</pub-id>
                        <pub-id pub-id-type="doi">10.1155/2023/5548684</pub-id>
                    </mixed-citation>
                </ref>
            </ref-list>
        </back>
    </sub-article>
</article>
