<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN" "http://jats.nlm.nih.gov/publishing/1.2/JATS-journalpublishing1.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="methods-article" dtd-version="1.2" xml:lang="en">
    <front>
        <journal-meta>
            <journal-id journal-id-type="pmc">F1000Research</journal-id>
            <journal-title-group>
                <journal-title>F1000Research</journal-title>
            </journal-title-group>
            <issn pub-type="epub">2046-1402</issn>
            <publisher>
                <publisher-name>F1000 Research Limited</publisher-name>
                <publisher-loc>London, UK</publisher-loc>
            </publisher>
        </journal-meta>
        <article-meta>
            <article-id pub-id-type="doi">10.12688/f1000research.173421.1</article-id>
            <article-categories>
                <subj-group subj-group-type="heading">
                    <subject>Method Article</subject>
                </subj-group>
                <subj-group>
                    <subject>Articles</subject>
                </subj-group>
            </article-categories>
            <title-group>
                <article-title>Ten Tips for AI&#x2011;Assisted Key Feature Problems: A Validity&#x2011;Informed Guide for Medical Education</article-title>
                <fn-group content-type="pub-status">
                    <fn>
                        <p>[version 1; peer review: awaiting peer review]</p>
                    </fn>
                </fn-group>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Zafar</surname>
                        <given-names>Imran</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Visualization</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Farooq</surname>
                        <given-names>Munawar</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0009-0009-2537-7115</uri>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Caliskan</surname>
                        <given-names>S. Ayhan</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Magzoub</surname>
                        <given-names>Mohi Eldin</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Supervision</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-6721-4500</uri>
                    <xref ref-type="corresp" rid="c1">a</xref>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <aff id="a1">
                    <label>1</label>Department of Medical Education, United Arab Emirates University College of Medicine and Health Sciences, Al Ain, Abu Dhabi, 20004, United Arab Emirates</aff>
            </contrib-group>
            <author-notes>
                <corresp id="c1">
                    <label>a</label>
                    <email xlink:href="mailto:mmagzoub@uaeu.ac.ae">mmagzoub@uaeu.ac.ae</email>
                </corresp>
                <fn fn-type="conflict">
                    <p>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>24</day>
                <month>12</month>
                <year>2025</year>
            </pub-date>
            <pub-date pub-type="collection">
                <year>2025</year>
            </pub-date>
            <volume>14</volume>
            <elocation-id>1446</elocation-id>
            <history>
                <date date-type="accepted">
                    <day>8</day>
                    <month>12</month>
                    <year>2025</year>
                </date>
            </history>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2025 Zafar I et al.</copyright-statement>
                <copyright-year>2025</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access article distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <self-uri content-type="pdf" xlink:href="https://f1000research.com/articles/14-1446/pdf"/>
            <abstract>
                <p>Generative artificial intelligence (AI) can augment educators&#x2019; capacity to design high-quality Key Feature Problems (KFPs) for valid assessment of clinical reasoning and decision-making. This practice-oriented guide presents ten evidence-informed tips for using AI to develop KFPs that are aligned with learning outcomes, cognitively demanding, and contextually authentic. Drawing on the KFP literature and contemporary validity frameworks (content, cognitive and response processes, internal structure, and consequences), we synthesize practical strategies for translating outcomes into key features, constructing realistic vignettes, creating parallel case variants, targeting higher-order thinking, ensuring curricular alignment and learner-level appropriateness, diversifying complementary item formats, validating AI-assisted items through a stepwise workflow, delivering decision-specific feedback, iterating from learner performance data, and safeguarding equity, ethics, and governance. We illustrate these recommendations with concise examples and an adapted validation workflow that supports both formative and summative applications. Although AI can accelerate scenario construction and feedback drafting, human expertise remains essential to verify clinical accuracy, prevent bias and hallucinations, calibrate difficulty, and preserve assessment security. With transparent processes and expert review, AI can serve as a collaborative assistant rather than a replacement, helping medical educators build rigorous KFPs that enhance the assessment of clinical decision-making.</p>
            </abstract>
            <kwd-group kwd-group-type="author">
                <kwd>Medical Education</kwd>
                <kwd>Generative Artificial Intelligence</kwd>
                <kwd>Key Feature Problems</kwd>
            </kwd-group>
            <funding-group>
                <funding-statement>The author(s) declared that no grants were involved in supporting this work.</funding-statement>
            </funding-group>
        </article-meta>
    </front>
    <body>
        <sec id="sec1">
            <title>Background</title>
            <p>
Key Feature Problems (KFPs) are an established assessment tool in medical education, designed to evaluate clinical decision-making skills among medical students and practitioners. KFP focus on the &#x201c;key features&#x201d; of a clinical case, those critical steps or decisions that are most essential to managing the clinical case scenario effectively (
                <xref ref-type="bibr" rid="ref28">Page et al., 1995</xref>). By concentrating on these pivotal elements, a KFPs offer a focused and efficient means of assessing learners&#x2019; clinical decisions in context, thereby bridging the gap between theoretical knowledge and practical application (
                <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>).</p>
            <p>Incorporating KFPs into medical education supports the integration of foundational scientific knowledge with clinical practice. The application of basic science principles within clinical reasoning is fundamental to competent medical decision-making. KFPs facilitate this integration by requiring learners to apply their understanding of underlying scientific mechanisms when evaluating clinical scenarios (
                <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>). This alignment ensures that students are not only acquiring factual knowledge but are also developing the capacity to apply that knowledge in nuanced, real-world clinical situations.</p>
            <p>Moreover, the growing emphasis on clinical reasoning and self-directed learning in contemporary medical curricula underscores the relevance of KFPs. As assessment tools, KFPs are well-suited to evaluating higher-order thinking skills and have demonstrated reliability and validity in this domain (
                <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>). By simulating authentic clinical decisions, KFPs support the development of critical thinking, promote problem-solving, and prepare learners to handle clinical complexity with confidence (
                <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>).</p>
        </sec>
        <sec id="sec2">
            <title>Challenges and principles in designing Key Feature Problems (KFPs)</title>
            <p>Developing high-quality Key Feature Problems (KFPs) questions presents several challenges, particularly in ensuring clinical accuracy, curricular alignment, and educational relevance. Unlike traditional multiple-choice questions, KFPs aim to assess clinical decision-making skills through context-rich scenarios that mirror real-life practice, making their construction inherently complex (
                <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>).</p>
            <p>Clinical accuracy is essential for maintaining the quality and integrity of assessments. A KFP often span multiple disciplines and include nuanced decision points; therefore, any factual inaccuracies can compromise validity and undermine the assessment of decision-making skills. With the rapid evolution of medical knowledge, it is essential to regularly update KFPs content to reflect current guidelines and best practices (
                <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>).</p>
            <p>Effective KFP design requires alignment with clearly defined learning outcomes. Each scenario should target specific competencies expected of learners, thereby reinforcing curricular goals and ensuring that assessment remains educationally relevant (
                <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>). This is especially critical within the framework of competency-based medical education (CBME), where the emphasis is on demonstrable, practice-ready skills rather than rote memorization (
                <xref ref-type="bibr" rid="ref7">Connor et al., 2020</xref>). When strategically embedded across the curriculum, KFP offer longitudinal reinforcement of essential clinical competencies, supporting both horizontal and vertical integration of knowledge.</p>
            <p>KFPs also offer opportunities to promote and evaluate ethical reasoning and professionalism. By incorporating patient-centered dilemmas or moral conflicts, KFPs can assess not only technical knowledge but also character formation and decision-making in ethically complex situations (
                <xref ref-type="bibr" rid="ref2">Andrade et al., 2024</xref>).</p>
            <p>Authenticity is a defining characteristic of effective KFPs. Scenarios should reflect real-world clinical contexts, be appropriately pitched to the learner&#x2019;s stage of training, and avoid cognitive overload. Appropriately scaffolded cases enhance engagement, reduce anxiety, and improve confidence (
                <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>). Emphasizing decision points related to diagnosis, management, and follow-up reinforces the transfer of knowledge to clinical settings (
                <xref ref-type="bibr" rid="ref15">Hrynchak et al., 2014</xref>).</p>
            <p>Finally, eliminating extraneous information is critical. Irrelevant details can distract learners from key issues, increase cognitive load, and hinder performance. Streamlined scenarios sharpen focus on the essential decisions, promoting efficient and accurate reasoning skills vital in high-stakes clinical environments. Well-crafted KFPs thus strike a balance between realism, challenge, and educational purpose, serving as a robust tool for developing and evaluating clinical reasoning throughout medical training (
                <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>).</p>
        </sec>
        <sec id="sec3">
            <title>Why AI for KFP now?</title>
            <p>Artificial intelligence is reshaping medical education through adaptive, data-informed tools that can strengthen both learning and assessment. Generative models, including large language models and simulation platforms, can rapidly produce realistic Key Feature Problems that align with explicit learning outcomes, match intended cognitive levels, and reflect authentic clinical contexts. This capacity accelerates the creation of item banks while supporting coherence with curricular blueprints and competency frameworks (
                <xref ref-type="bibr" rid="ref16">Indran et al., 2024</xref>; 
                <xref ref-type="bibr" rid="ref31">Qiu &amp; Liu, 2025</xref>).</p>
            <p>Beyond item drafting, AI enables innovations that are directly relevant to KFPs design and use. Systems can generate virtual patients and interactive clinical vignettes that situate key decisions within believable settings, which promotes transfer of reasoning across variants and settings (
                <xref ref-type="bibr" rid="ref29">Potter &amp; Jefferies, 2024</xref>; 
                <xref ref-type="bibr" rid="ref34">Sardesai et al., 2024</xref>). AI-supported analytics can provide real-time or near-real-time feedback, surface common reasoning errors, and personalize practice based on learner performance patterns, thereby improving formative value and supporting programmatic assessment (
                <xref ref-type="bibr" rid="ref26">Mishra et al., 2024</xref>). Exposure to these tools also advances AI literacy, a competency that future clinicians increasingly require (
                <xref ref-type="bibr" rid="ref35">Subaveerapandiyan et al., 2024</xref>).</p>
            <p>Once deployed, AI can assist with continuous quality improvement of KFPs. Models can analyze response data to detect weak distractors, ambiguous wording, and miscalibrated difficulty, then propose targeted revisions for expert review. Where governance and privacy protections are in place, linkage to de-identified clinical data or the use of synthetic datasets can further enhance authenticity by anchoring scenarios in realistic patterns of presentation and management. However, such integrations require careful oversight by institutions and remain context-dependent (
                <xref ref-type="bibr" rid="ref5">Blau et al., 2024</xref>).</p>
            <p>These opportunities come with risks that must be actively managed. Generative systems can hallucinate facts, propagate outdated guidelines, and encode or amplify social and clinical biases. Responsible adoption, therefore, requires transparent processes, faculty development, and explicit ethical and data governance frameworks. Human subject matter expertise remains essential for verifying clinical accuracy, ensuring fairness, calibrating cognitive demand, and protecting item security (
                <xref ref-type="bibr" rid="ref13">Franco D&#x2019;Souza et al., 2024</xref>; 
                <xref ref-type="bibr" rid="ref37">Tolsgaard et al., 2023</xref>).</p>
            <p>In sum, strategic use of AI offers a scalable and evidence-informed approach to designing, validating, and iteratively improving KFPs. The following ten tips translate these opportunities and caution into concrete steps that educators can apply to create high-quality, learner-appropriate, and ethically sound KFPs.</p>
        </sec>
        <sec id="sec4">
            <title>How we developed these tips</title>
            <p>We developed the ten tips through a staged process that combined theory, existing assessment standards, and iterative Subject-Matter Expert (SME) review. First, we mapped recurrent problems in AI-generated KFPs (construct drift, shallow recall, unsafe feedback, weak item documentation) against established assessment sources in medical education (key-feature literature, blueprinting and OSCE validation guidance, 
                <xref ref-type="bibr" rid="ref25">Messick-style (1995)</xref> validity argumentation). From this mapping we kept only frameworks that could be implemented in low- and medium-stakes contexts and that preserved the key-feature construct. Second, we used AI to draft multiple versions of each tip (purpose, action, example), then circulated these drafts to SME (assessment and clinical) to remove clinically unsafe suggestions, localize to GCC (Gulf Cooperation Council) countries practice, and align with curriculum learning outcomes&#x2019;. Third, we trialed the tips on real AI outputs to see which ones actually improved item quality; tips that did not change SME ratings were merged or dropped. The final ten tips, therefore, represent the set that was (a) evidence-attuned, (b) feasible for routine faculty use, and (c) auditable through the adapted 5-step validation workflow.</p>
        </sec>
        <sec id="sec5">
            <title>Ten tips for writing key feature problems using generative AI</title>
            <p>This section provides ten practical and evidence-informed tips to help medical educators integrate generative AI into the design of KFPs. Each tip is aimed at ensuring that AI-generated questions are pedagogically sound, clinically relevant, and aligned with curricular goals. By applying these strategies, educators can enhance the quality of assessment tools used to evaluate clinical decision-making, while also improving the efficiency of content development.</p>
            <sec id="sec6">
                <title>Tip 1: Define learning outcomes and key features</title>
                <p>Before using generative AI to develop Key Feature Problems (KFPs), educators should first define clear, measurable learning outcomes and derive the corresponding key features. Key features represent the critical decisions or actions that determine effective clinical management (
                    <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                    <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>). Establishing these foundations ensures that AI-generated content is grounded in explicit educational intent and that each scenario targets competencies essential to clinical reasoning. Developing learning outcomes and key features in advance prevents the creation of unfocused or misaligned cases and supports validity by ensuring that each question assesses a decision point directly related to the intended outcome.</p>
                <p>Once the initial key features are identified, AI can assist in refining and expanding them. By analyzing large datasets or educational case repositories, AI can identify additional high-yield decision points that may not be apparent through manual analysis. Drawing upon diverse clinical information allows educators to uncover patterns and associations that enhance authenticity and completeness. This process strengthens alignment between curricular objectives and the reasoning steps that differentiate expert from novice performance (
                    <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                    <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>).</p>
                <p>

                    <bold>Example 1:</bold>
                </p>
                <p>

                    <bold>Learning Objective:</bold>
                </p>
                <p>Demonstrate the ability to diagnose and manage acute asthma in adult patients.</p>
                <p>

                    <bold>Identified Key Features:
</bold>

                    <list list-type="order">
                        <list-item>
                            <label>1.</label>
                            <p>Assess severity of the asthma exacerbation.</p>
                        </list-item>
                        <list-item>
                            <label>2.</label>
                            <p>Initiate immediate treatment.</p>
                        </list-item>
                        <list-item>
                            <label>3.</label>
                            <p>Decide on patient disposition (admitting or discharge).</p>
                        </list-item>
                    </list>
                </p>
                <p>

                    <bold>AI-Generated Case (Short Clinical Vignette):</bold>
                </p>
                <p>A 30-year-old patient presents to the emergency department with shortness of breath and audible wheezing for the past two hours. The patient has a known history of asthma and seasonal allergies.</p>
                <p>

                    <bold>Key Feature Questions:</bold>

                    <list list-type="order">
                        <list-item>
                            <label>1.</label>
                            <p>(Write-in) What two clinical assessments are most important for determining the severity of this exacerbation?</p>
                        </list-item>
                        <list-item>
                            <label>2.</label>
                            <p>(Short-menu) Select the three most appropriate immediate treatments:
</p>
                        </list-item>
                    </list>

                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Inhaled &#x03b2;
                                <sub>2</sub>-agonist</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Systemic corticosteroid</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Oxygen therapy</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Antibiotic therapy</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Antihistamine</p>
                        </list-item>
                        <list-item>
                            <label>3.</label>
                            <p>(Short-menu) Which criteria would guide your decision to discharge the patient? (Select all that apply.)</p>
                        </list-item>
                    </list>
                </p>
                <p>This sequence (learning outcome &#x2192; key features &#x2192; case &#x2192; questions) illustrates the structured logic of KFP design and specifies the item format and number of responses required, consistent with established methodology (
                    <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                    <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>).</p>
                <p>

                    <bold>Example 2:</bold>
                </p>
                <p>Educators who identify preliminary key features for managing acute chest pain, such as:
                    <list list-type="order">
                        <list-item>
                            <label>1.</label>
                            <p>Obtaining an appropriate history and identifying red-flag symptoms,</p>
                        </list-item>
                        <list-item>
                            <label>2.</label>
                            <p>Initiating essential diagnostic investigations, and</p>
                        </list-item>
                        <list-item>
                            <label>3.</label>
                            <p>Deciding on immediate management priorities, can use AI tools to refine and extend these features.</p>
                        </list-item>
                    </list>
                </p>
                <p>Large language models may reveal additional decision points, including:

                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Differentiating cardiac from non-cardiac causes (for example, pulmonary embolism or aortic dissection).</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Recognizing atypical presentations in diabetic or female patients.</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Applying Risk Stratification Tools in clinical decision making.</p>
                        </list-item>
                    </list>
                </p>
                <p>These refinements help ensure that the resulting KFPs capture a broader spectrum of clinical complexity and reflect authentic decision-making challenges encountered in practice (
                    <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                    <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>).</p>
                <p>

                    <bold>Note:</bold> KFPs may be presented as write-in or short-menu (SM) items. In SM formats, response options and the number of required selections must always be explicitly stated. At this stage, AI assists in improving the quality and breadth of key features, but the educator retains responsibility for selecting which AI-suggested features to include when constructing the final clinical vignette and corresponding questions.</p>
            </sec>
            <sec id="sec7">
                <title>Tip 2: Build authentic and context-rich Clinical Scenarios</title>
                <p>Creating realistic and contextually grounded clinical scenarios is essential to the educational value of KFPs. Once key features have been identified and refined, generative AI can be used to construct authentic vignettes that situate these decisions within believable clinical contexts (
                    <xref ref-type="bibr" rid="ref4">Berbenyuk et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref31">Qiu &amp; Liu, 2025</xref>). By incorporating relevant demographic, environmental, and psychosocial details, AI helps simulate the complexity of real-world medical encounters (
                    <xref ref-type="bibr" rid="ref29">Potter &amp; Jefferies, 2024</xref>; 
                    <xref ref-type="bibr" rid="ref34">Sardesai et al., 2024</xref>).</p>
                <p>AI tools can also vary contextual parameters, such as disease stage, comorbidities, or resource limitations, to produce multiple versions of the same case. This contextual diversity strengthens students&#x2019; ability to transfer reasoning across scenarios and enhances case authenticity without adding to faculty workload (
                    <xref ref-type="bibr" rid="ref4">Berbenyuk et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref16">Indran et al., 2024</xref>).</p>
                <p>

                    <bold>Example:</bold>
                </p>
                <p>

                    <bold>AI-Enhanced Realistic KFP Scenario (Short Vignette)</bold>
                </p>
                <p>Mr. Ali K., a 58-year-old taxi driver with long-standing hypertension and type 2 diabetes, arrives at a community clinic complaining of mild chest discomfort radiating to his jaw. He reports the pain began after climbing stairs 30 minutes ago and has gradually subsided. He takes metformin and amlodipine irregularly. Vital signs: BP 160/95 mmHg, HR 88 bpm, SpO
                    <sub>2</sub> 97%, BMI 31 kg/m
                    <sup>2</sup>. The nearest hospital is 25 km away.</p>
                <p>

                    <bold>Key Feature Questions:</bold>

                    <list list-type="order">
                        <list-item>
                            <label>1.</label>
                            <p>(Write-in) What initial clinical assessments are essential before deciding whether this patient can safely remain in the clinic? List up to two.</p>
                        </list-item>
                        <list-item>
                            <label>2.</label>
                            <p>(Short-menu) What are the two most critical diagnostic tests to confirm your leading diagnosis? List up to two.
</p>
                        </list-item>
                    </list>

                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>12-lead ECG</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Cardiac troponin I</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Chest X-ray</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>D-dimer</p>
                        </list-item>
                        <list-item>
                            <label>3.</label>
                            <p>(Short-menu) Which management action should be taken immediately? (Select one.)
</p>
                        </list-item>
                    </list>

                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Administer oral Aspirin and arrange urgent transfer</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Begin oral antihypertensive therapy and review next week</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Provide reassurance and schedule stress test</p>
                        </list-item>
                    </list>
                </p>
                <p>By prompting AI to integrate demographic, psychosocial, and logistic details, educators can generate scenarios that are not only clinically coherent but also contextually realistic (
                    <xref ref-type="bibr" rid="ref29">Potter &amp; Jefferies, 2024</xref>; 
                    <xref ref-type="bibr" rid="ref31">Qiu &amp; Liu, 2025</xref>). Such authenticity strengthens cognitive fidelity, meaning that decisions made in the scenario closely mirror real clinical reasoning, thereby enhancing learners&#x2019; engagement and readiness for practice (
                    <xref ref-type="bibr" rid="ref30">Preiksaitis &amp; Rose, 2023</xref>; 
                    <xref ref-type="bibr" rid="ref34">Sardesai et al., 2024</xref>).</p>
                <p>

                    <bold>Note:</bold> While AI can enhance realism, each generated scenario must undergo expert review to verify clinical accuracy and appropriateness for the target learner level (
                    <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                    <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>).</p>
            </sec>
            <sec id="sec8">
                <title>Tip 3: Generate scenario diversity and parallel case variants</title>
                <p>Generative AI can be strategically used to create multiple, pedagogically distinct versions of clinical scenarios centered on the same medical condition. This approach promotes both educational richness and psychometric robustness by exposing learners to varied but conceptually equivalent challenges (
                    <xref ref-type="bibr" rid="ref4">Berbenyuk et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref16">Indran et al., 2024</xref>).</p>
                <p>By varying contextual elements such as patient demographics, comorbidities, access to resources, and disease stage, AI helps educators design cases that assess the transfer of learning rather than rote recall (
                    <xref ref-type="bibr" rid="ref15">Hrynchak et al., 2014</xref>). For instance, a single learning outcome on &#x201c;acute coronary syndrome management&#x201d; can be represented through different case variants: a young woman with atypical chest pain, an elderly diabetic with silent ischemia, or a middle-aged smoker with classic symptoms. Each variant targets the same underlying key features but tests adaptive reasoning in distinct contexts (
                    <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                    <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>).</p>
                <p>AI can also support psychometric balance by generating parallel cases matched on cognitive level and difficulty, aiding blueprinting and longitudinal assessment across cohorts (
                    <xref ref-type="bibr" rid="ref16">Indran et al., 2024</xref>). Through controlled prompting, educators can maintain item equivalence while ensuring content freshness and reduced cueing effects. This capacity is especially useful for formative assessments, progress tests, and multi-institutional benchmarking.</p>
                <p>

                    <bold>Example:</bold>
                </p>
                <p>

                    <bold>Learning Objective:</bold> Manage patients presenting with myocardial infarction.</p>
                <p>

                    <bold>Common Key Features:</bold>

                    <list list-type="order">
                        <list-item>
                            <label>1.</label>
                            <p>Identify ischemic symptoms and risk factors.</p>
                        </list-item>
                        <list-item>
                            <label>2.</label>
                            <p>Interpret ECG and cardiac biomarkers.</p>
                        </list-item>
                        <list-item>
                            <label>3.</label>
                            <p>Initiate evidence-based acute management.</p>
                        </list-item>
                    </list>
                </p>
                <p>By generating structured variants like these, AI helps educators evaluate consistency in reasoning across different contexts while maintaining construct validity. Moreover, such diversity supports inclusivity, ensuring exposure to a range of patient profiles and system-level challenges (
                    <xref ref-type="bibr" rid="ref26">Mishra et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref36">Teferi et al., 2023</xref>).</p>
                <p>When educators vary contextual parameters such as disease stage, comorbidities, or resource limitations, AI can produce multiple case versions to strengthen transfer of reasoning (
                    <xref ref-type="table" rid="T1">
Table 1</xref>).</p>
                <table-wrap id="T1" orientation="portrait" position="float">
                    <label>
Table 1. </label>
                    <caption>
                        <title>AI-generated scenario variants.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Scenario</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Contextual variation</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
Key decision focus</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>Case A</bold>: 65-year-old male with classic ST-elevation MI in tertiary hospital</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Resource-rich environment</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Timely reperfusion decision</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>Case B</bold>: 48-year-old female with atypical symptoms and normal ECG in rural clinic</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Limited diagnostics available</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Decision to transfer or observe</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <bold>Case C</bold>: 72-year-old diabetic with dyspnea but no chest pain</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Comorbid and silent presentation</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Recognition of atypical MI</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>

                    <bold>Note:</bold> Each AI-generated variant should be reviewed for alignment with curricular outcomes and calibrated for difficulty using item analysis or expert consensus (
                    <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                    <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>).</p>
            </sec>
            <sec id="sec9">
                <title>Tip 4: Scaffold higher-order clinical reasoning</title>
                <p>Effective KFPs go beyond factual recall and assess a learner&#x2019;s ability to analyze, synthesize, and evaluate complex clinical information at the upper levels of Bloom&#x2019;s taxonomy (
                    <xref ref-type="bibr" rid="ref39">Zaidi et al., 2018</xref>). Generative AI can assist educators in scaffolding these higher-order cognitive processes by helping design questions that explicitly demand interpretation, prioritization, and reasoning rather than mere recognition (
                    <xref ref-type="bibr" rid="ref4">Berbenyuk et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref16">Indran et al., 2024</xref>).</p>
                <p>By adjusting prompts and parameters, educators can use AI to generate versions of KFP that target specific cognitive levels, for example, distinguishing between tasks that ask students to identify key findings (lower order) and those that require them to justify management decisions or evaluate competing interventions (higher order) (
                    <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                    <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>). This calibrated complexity enhances both formative and summative assessment design within competency-based curricula (
                    <xref ref-type="bibr" rid="ref17">Jantausch et al., 2023</xref>).</p>
                <p>AI can also suggest reasoning scaffolds such as stepwise justification prompts, conditional branching, or &#x201c;what-if&#x201d; variations that help learners articulate the logic behind their choices. When combined with faculty validation, these features turn KFPs into active reasoning exercises that closely resemble real-world diagnostic and management decision-making (
                    <xref ref-type="bibr" rid="ref3">Ara&#x00fa;jo et al., 2024</xref>).</p>
                <p>

                    <bold>Example:</bold>
                </p>
                <p>

                    <bold>Learning Objective:</bold> Apply critical reasoning to prioritize diagnostic steps in a patient with acute shortness of breath.</p>
                <p>

                    <bold>Key Features:</bold>

                    <list list-type="order">
                        <list-item>
                            <label>1.</label>
                            <p>Interpret initial presentation and vital signs.</p>
                        </list-item>
                        <list-item>
                            <label>2.</label>
                            <p>Identify the most urgent diagnostic investigation.</p>
                        </list-item>
                        <list-item>
                            <label>3.</label>
                            <p>Evaluate management priorities based on evolving information.</p>
                        </list-item>
                    </list>
                </p>
                <p>

                    <bold>AI-Generated Higher-Order Question Sequence</bold>

                    <list list-type="order">
                        <list-item>
                            <label>1.</label>
                            <p>(Write-in) Based on this patient&#x2019;s presentation, what is your leading differential diagnosis? List up to two.</p>
                        </list-item>
                        <list-item>
                            <label>2.</label>
                            <p>(Short-menu) Select the two investigations that will most efficiently confirm your diagnosis.</p>
                        </list-item>
                        <list-item>
                            <label>3.</label>
                            <p>(Write-in) The chest X-ray reveals a right-sided pneumothorax. Outline the next two management steps and justify their sequence.</p>
                        </list-item>
                    </list>
                </p>
                <p>This structure progresses from analysis to evaluation, showing how AI can scaffold increasing levels of cognitive complexity within a single clinical context.</p>
                <p>By refining prompts to elicit reasoning, educators ensure that AI-generated KFP assess how students think, not just what they know (
                    <xref ref-type="bibr" rid="ref3">Ara&#x00fa;jo et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref17">Jantausch et al., 2023</xref>).</p>
                <p>

                    <bold>Note:</bold> While AI can help generate cognitively rich content, final validation by subject-matter experts is essential to confirm that each question targets the intended cognitive level and aligns with learning outcomes (
                    <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                    <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>).</p>
            </sec>
            <sec id="sec10">
                <title>Tip 5: Align item complexity and format with learner level and curriculum</title>
                <p>Generative AI can accelerate the development of draft Key Feature Problems (KFPs), but educator oversight remains essential to ensure that items are constructively aligned with curricular outcomes, competency frameworks, and learner progression (
                    <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                    <xref ref-type="bibr" rid="ref14">Harden et al., 1999</xref>). AI-generated content should also be contextualized to the institution&#x2019;s clinical setting, patient population, and healthcare realities, enhancing authenticity and local relevance (
                    <xref ref-type="bibr" rid="ref4">Berbenyuk et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref24">McLaughlin et al., 2019</xref>).</p>
                <p>Item complexity must match the learner&#x2019;s cognitive and experiential readiness. Early-phase students benefit from single-decision questions emphasizing recognition, while advanced learners should tackle multi-step cases demanding integration and prioritization (
                    <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                    <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>). AI can scaffold difficulty by varying diagnostic ambiguity, patient stability, or data availability, supporting progressive learning across preclinical and clinical phases (
                    <xref ref-type="bibr" rid="ref4">Berbenyuk et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref16">Indran et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref37">Tolsgaard et al., 2023</xref>).</p>
                <p>An illustrative example of progressive complexity across learner levels focused on managing diabetic ketoacidosis (DKA) is provided in 
                    <xref ref-type="table" rid="T2">
Table 2</xref>. This example demonstrates how item difficulty can be structured according to learner competence, from early recognition to advanced management and prioritization.</p>
                <table-wrap id="T2" orientation="portrait" position="float">
                    <label>
Table 2. </label>
                    <caption>
                        <title>Example: Learning outcome: Manage diabetic ketoacidosis (DKA) across varying levels of competence.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">
Learner level</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">AI-Generated focus</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Example question type</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Early learners</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Identify key diagnostic findings</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <italic toggle="yes">Write-in:</italic> List two laboratory findings confirming DKA.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Intermediate learners</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Interpret severity and initiate management</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <italic toggle="yes">Short-menu:</italic> Select three immediate management steps.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Advanced learners</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Prioritize interventions in unstable patient</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <italic toggle="yes">Write-in:</italic> Describe the sequence of management if the patient&#x2019;s blood pressure drops to 80/50 mmHg despite fluid resuscitation.</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>AI can further diversify assessment by reformatting a single clinical concept into multiple item types, such as short-answer, extended-matching, or multiple-response questions, while maintaining the same cognitive intent (
                    <xref ref-type="bibr" rid="ref16">Indran et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref18">Javaeed, 2018</xref>). This enhances reliability and fairness by sampling reasoning across modalities and supports triangulation in programmatic assessment frameworks (
                    <xref ref-type="bibr" rid="ref7">Connor et al., 2020</xref>; 
                    <xref ref-type="bibr" rid="ref12">Fatima et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref37">Tolsgaard et al., 2023</xref>).</p>
                <p>

                    <bold>Example:</bold>
                </p>
                <p>A 28-year-old man presents with sudden onset of severe shortness of breath after a long-haul flight. He is tachycardic and mildly hypoxic.</p>
                <p>

                    <bold>Original KFP:</bold>
                </p>
                <p>What is the most likely diagnosis, and what is the next immediate investigation? (
                    <italic toggle="yes">Write-in
</italic>).</p>
                <p>An illustrative transformation of a single vignette into multiple formats is provided in 
                    <xref ref-type="table" rid="T3">
Table 3</xref>.</p>
                <table-wrap id="T3" orientation="portrait" position="float">
                    <label>
Table 3. </label>
                    <caption>
                        <title>Illustrative example of how a single clinical vignette can be reformatted by AI into multiple item types while preserving cognitive intent and targeting different assessment foci.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">
Format</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">AI-Generated example</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
Assessment focus</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Short menu</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Select the two most likely diagnoses: pulmonary embolism, pneumothorax, pneumonia, acute asthma.</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Diagnostic reasoning</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Extended-Matching
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Select the next immediate investigation from a list applicable across short vignettes.</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Decision-making under time constraint</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Short-Answer
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Explain the pathophysiological mechanism leading to this presentation.</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Integration of basic and clinical sciences</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Multiple-Response
</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Which of the following management steps should be taken immediately? (
                                    <italic toggle="yes">Select all that apply.</italic>)</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Prioritization and safety judgment</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>By aligning complexity, format, and learner stage, AI enables coherent and longitudinal assessment design that reinforces stage-appropriate competencies while maintaining curricular coherence (
                    <xref ref-type="bibr" rid="ref4">Berbenyuk et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref14">Harden et al., 1999</xref>; 
                    <xref ref-type="bibr" rid="ref16">Indran et al., 2024</xref>).</p>
                <p>

                    <bold>Note:</bold> While AI can automate scaffolding and reformatting, faculty judgment remains indispensable to verify that each item accurately represents the intended cognitive process and meets clinical and psychometric standards (
                    <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                    <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>).</p>
            </sec>
            <sec id="sec11">
                <title>Tip 6: Validate item using the 5-step workflow</title>
                <p>The rapid generation of KFPs by generative AI demands a structured and defensible validation process to ensure that the resulting items meet accepted standards of quality, fairness, and educational relevance. To address this need, we adapted an evidence-based framework for validating AI-generated assessment content, drawing upon widely recognized validity models from 
                    <xref ref-type="bibr" rid="ref25">Messick (1995)</xref>, 
                    <xref ref-type="bibr" rid="ref19">Kane (2013)</xref>, 
                    <xref ref-type="bibr" rid="ref9">Downing (2002)</xref>, and 
                    <xref ref-type="bibr" rid="ref8">Cook et al. (2015)</xref>.</p>
                <p>This adapted process integrates principles of content validity, cognitive process verification, response process accuracy, internal structure coherence, and consequential validity, contextualized for AI-assisted item generation (
                    <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                    <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>; 
                    <xref ref-type="bibr" rid="ref37">Tolsgaard et al., 2023</xref>). It provides educators with a transparent and replicable structure for reviewing and approving AI-generated questions prior to implementation (
                    <xref ref-type="table" rid="T4">
Table 4</xref>).</p>
                <table-wrap id="T4" orientation="portrait" position="float">
                    <label>
Table 4. </label>
                    <caption>
                        <title>Adapted process for validating AI-generated questions.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">
Stage</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Purpose</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
Validation evidence/Method</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">
Source framework</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">1. Content Validation</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Ensure alignment with curriculum outcomes and intended learning objectives.</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">SME review for relevance, accuracy, and blueprint mapping.</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">(
                                    <xref ref-type="bibr" rid="ref9">Downing, 2002</xref>; 
                                    <xref ref-type="bibr" rid="ref25">Messick, 1995</xref>)</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">2. Cognitive Process Validation</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Confirm that questions elicit the intended reasoning steps (analysis, synthesis, evaluation).</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Think-aloud or expert cognitive walkthrough of each question&#x2019;s reasoning pathway.</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">(
                                    <xref ref-type="bibr" rid="ref8">Cook et al., 2015</xref>)</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">3. Response Process Validation</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Verify that the expected student response corresponds to the key decision or action.</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Pilot testing with small student sample; collect verbal feedback.</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">(
                                    <xref ref-type="bibr" rid="ref8">Cook et al., 2015</xref>; 
                                    <xref ref-type="bibr" rid="ref19">Kane, 2013</xref>)</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">4. Internal Structure Validation</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Examine psychometric properties (difficulty, discrimination, reliability).</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Post-administration item analysis (CTT or IRT).</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">(
                                    <xref ref-type="bibr" rid="ref8">Cook et al., 2015</xref>; 
                                    <xref ref-type="bibr" rid="ref10">Downing, 2004</xref>)</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">5. Consequential Validation</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Evaluate educational impact and fairness.</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Review of learner performance data, feedback, and potential bias in AI outputs.</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">(
                                    <xref ref-type="bibr" rid="ref25">Messick, 1995</xref>)</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>This structured approach does not replace psychometric analysis but provides a pragmatic validity chain that educators can apply before large-scale deployment. Each step contributes evidence toward construct validity, ensuring that AI-generated KFPs assess genuine clinical reasoning rather than superficial pattern recognition (
                    <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                    <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>; 
                    <xref ref-type="bibr" rid="ref38">Wade et al., 2012</xref>).</p>
                <p>The overall process is visualized in 
                    <xref ref-type="fig" rid="f1">
Figure 1</xref>, which outlines the adapted five-step validation workflow for AI-generated assessment items.</p>
                <fig fig-type="figure" id="f1" orientation="portrait" position="float">
                    <label>
Figure 1. </label>
                    <caption>
                        <title>The 5-step validation process for AI-generated assessment items.</title>
                    </caption>
                    <graphic id="gr1" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/191236/2f5165c3-f8fa-4ee7-8fdf-4839d77a239e_figure1.gif"/>
                </fig>
                <p>

                    <bold>Example Application:</bold>
                </p>
                <p>Suppose AI generates a KFP on managing community-acquired pneumonia.
                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>

                                <bold>Stage 1:</bold> SMEs confirm the key features (diagnosis, antibiotic choice, admission criteria) match curricular outcomes.</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>

                                <bold>Stage 2:</bold> Cognitive walkthrough reveals the item requires decision-making rather than recall.</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>

                                <bold>Stage 3:</bold> A pilot group of students completes the item; feedback confirms clarity of question intent.</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>

                                <bold>Stage 4:</bold> Item analysis after pilot shows appropriate difficulty (p = 0.65) and discrimination (r = 0.32).</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>

                                <bold>Stage 5:</bold> Post-assessment debrief confirms students perceived the question as realistic and fair.</p>
                        </list-item>
                    </list>
                </p>
                <p>

                    <bold>Note:</bold> The five-step validation process is an adaptation of established assessment validity frameworks (
                    <xref ref-type="bibr" rid="ref8">Cook et al., 2015</xref>; 
                    <xref ref-type="bibr" rid="ref9">Downing, 2002</xref>; 
                    <xref ref-type="bibr" rid="ref19">Kane, 2013</xref>; 
                    <xref ref-type="bibr" rid="ref25">Messick, 1995</xref>), contextualized for the use of generative AI in question development. It aims to provide a practical quality-assurance model for educators rather than propose a novel psychometric paradigm.</p>
            </sec>
            <sec id="sec12">
                <title>Tip 7: Provide decision-specific, actionable feedback</title>
                <p>Effective feedback in KFPs must be decision-specific, concise, and actionable, focusing on each key feature rather than the case as a whole (
                    <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                    <xref ref-type="bibr" rid="ref15">Hrynchak et al., 2014</xref>; 
                    <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>). Well-designed feedback helps learners understand why a particular decision is correct and why alternatives are less appropriate. Generative AI can assist in drafting such targeted feedback rapidly, but its output must always undergo SME review to verify clinical accuracy, tone, and contextual sensitivity (
                    <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                    <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>; 
                    <xref ref-type="bibr" rid="ref40">Zhang et al., 2025</xref>).</p>
                <p>Generative AI can be prompted to produce feedback at different levels of granularity, as summarized in 
                    <xref ref-type="fig" rid="f2">
Figure 2</xref>, which illustrates how prompts can generate decision-specific feedback messages tailored to each key feature.</p>
                <fig fig-type="figure" id="f2" orientation="portrait" position="float">
                    <label>
Figure 2. </label>
                    <caption>
                        <title>Prompting AI to generate decision-specific feedback at multiple levels.</title>
                    </caption>
                    <graphic id="gr2" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/191236/2f5165c3-f8fa-4ee7-8fdf-4839d77a239e_figure2.gif"/>
                </fig>
                <p>

                    <bold>Per-key feature rationales</bold> explaining both correct and incorrect choices, particularly valuable for short-menu (SM) items where learners must select a specified number of responses (
                    <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                    <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>).
                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>

                                <bold>Tiered feedback messages</bold> for correct, partially correct, and incorrect responses that identify common reasoning errors and suggest appropriate next steps in decision-making (
                                <xref ref-type="bibr" rid="ref6">Burner et al., 2025</xref>; 
                                <xref ref-type="bibr" rid="ref23">Lee &amp; Moore, 2024</xref>).</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>

                                <bold>Counterfactual prompts</bold>, such as &#x201c;What if the patient were hypotensive?&#x201d;, which encourage reflective reasoning without revealing the answer (
                                <xref ref-type="bibr" rid="ref6">Burner et al., 2025</xref>; 
                                <xref ref-type="bibr" rid="ref23">Lee &amp; Moore, 2024</xref>).</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>

                                <bold>Clarity refinements</bold> using plain-language summaries or controlled length limits to improve accessibility for diverse learners (
                                <xref ref-type="bibr" rid="ref6">Burner et al., 2025</xref>; 
                                <xref ref-type="bibr" rid="ref23">Lee &amp; Moore, 2024</xref>).</p>
                        </list-item>
                    </list>
                </p>
                <p>

                    <bold>Timing also matters.</bold> For formative KFP, immediate, key feature level feedback enhances learning efficiency and self-regulation (
                    <xref ref-type="bibr" rid="ref6">Burner et al., 2025</xref>; 
                    <xref ref-type="bibr" rid="ref23">Lee &amp; Moore, 2024</xref>). For summative KFP, delayed or aggregate feedback preserves item security while still supporting post-exam reflection (
                    <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                    <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>).</p>
                <p>Despite its efficiency, AI-generated feedback may lack nuance and contextual sensitivity in complex or atypical cases, which highlights the need for human oversight, particularly in edge scenarios (
                    <xref ref-type="bibr" rid="ref6">Burner et al., 2025</xref>). SMEs should verify that AI feedback accurately targets the intended reasoning process and does not introduce misleading or unsafe guidance.</p>
                <p>

                    <bold>Illustrative Example (Write-in + Short-Menu with Feedback)</bold>
                </p>
                <p>

                    <bold>Scenario (abridged):</bold> A 28-year-old presents with fever, headache, and neck stiffness.</p>
                <p>

                    <bold>KF-Q1 (write-in):</bold> What is the most likely diagnosis?
                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>

                                <bold>Correct feedback:</bold> &#x201c;Bacterial meningitis is most consistent with fever and neck stiffness; treat urgently with empiric antibiotics.&#x201d;</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>

                                <bold>Partially correct (&#x2018;viral meningitis&#x2019;):</bold> &#x201c;Consider illness severity and urgency of treatment&#x2014;what findings suggest bacterial rather than viral?&#x201d;</p>
                        </list-item>
                    </list>
                </p>
                <p>

                    <bold>KF-Q2 (SM; select 2):</bold> Which initial diagnostic investigations are required?
                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Lumbar puncture</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Blood Culture</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>CT Head</p>
                        </list-item>
                    </list>
                </p>
            </sec>
            <sec id="sec13">
                <title>Tip 8. Refine items using performance and psychometric data</title>
                <p>Continuous improvement of AI-generated KFPs depend on systematic analysis of response data and psychometric evidence. Educators should employ both quantitative and qualitative data to identify items that require revision, strengthening validity, reliability, and alignment with learning outcomes (
                    <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                    <xref ref-type="bibr" rid="ref21">Kim et al., 2022</xref>; 
                    <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>; 
                    <xref ref-type="bibr" rid="ref37">Tolsgaard et al., 2023</xref>).</p>
                <p>Data sources include item statistics from pilot tests (difficulty, discrimination, non-functioning options) and learner feedback on clarity and realism. When analyzed together, these indicators reveal whether each KFP effectively assesses the intended decision point (
                    <xref ref-type="bibr" rid="ref1">Almansour &amp; Alfhaid, 2024</xref>; 
                    <xref ref-type="bibr" rid="ref37">Tolsgaard et al., 2023</xref>). For example, very low discrimination may indicate that the question does not differentiate between competent and struggling learners, while an unexpectedly high success rate may suggest over-cueing or insufficient cognitive demand (
                    <xref ref-type="bibr" rid="ref21">Kim et al., 2022</xref>).</p>
                <p>AI can support this process by generating revised item versions based on educator feedback or psychometric findings. Prompted appropriately, the model can reword stems for clarity, modify distractors for plausibility, or adjust contextual parameters to correct misalignment (
                    <xref ref-type="bibr" rid="ref4">Berbenyuk et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref16">Indran et al., 2024</xref>). These revisions must then be revalidated by SMEs before reuse.</p>
                <p>

                    <bold>Illustrative Example (KFP Improvement via Data Review)</bold>
                </p>
                <p>

                    <bold>Original AI-Generated KFP (Pre-Revision)</bold>
                </p>
                <p>Scenario: A 35-year-old patient presents with pleuritic chest pain and mild dyspnea.</p>
                <p>

                    <bold>Question (Write-in):</bold> What is the most likely diagnosis?</p>
                <p>

                    <bold>Issue:</bold> Student response data showed poor discrimination (r = 0.05); many misidentified pneumonia or pneumothorax.</p>
                <p>

                    <bold>Data Insight:</bold> Qualitative feedback revealed insufficient contextual clues to differentiate pulmonary embolism from other causes of chest pain.</p>
                <p>

                    <bold>Revised KFP (Post-Review)</bold>
                </p>
                <p>Scenario: A 35-year-old female on oral contraceptives presents with sudden pleuritic chest pain and mild dyspnea after a 10-hour flight.</p>
                <p>

                    <bold>Question (Write-in):</bold> What is the most likely diagnosis?</p>
                <p>

                    <bold>Rationale:</bold> Added risk factor and temporal trigger clarified the intended decision focus (PE) without making the question easier. SME review confirmed improved alignment and realism.</p>
                <p>This example demonstrates how data-driven iteration enhances clarity, construct validity, and clinical authenticity (
                    <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                    <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>). The implementation steps for this iterative process are illustrated in 
                    <xref ref-type="fig" rid="f3">
Figure 3</xref>, which presents the data-driven KFP improvement workflow.</p>
                <fig fig-type="figure" id="f3" orientation="portrait" position="float">
                    <label>
Figure 3. </label>
                    <caption>
                        <title>Implementation steps for data-driven KFP improvement.</title>
                    </caption>
                    <graphic id="gr3" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/191236/2f5165c3-f8fa-4ee7-8fdf-4839d77a239e_figure3.gif"/>
                </fig>
                <p>

                    <bold>Implementation Steps for Data-Driven KFP Improvement</bold>

                    <list list-type="order">
                        <list-item>
                            <label>1.</label>
                            <p>

                                <bold>Collect data</bold> from pilot or formative use (difficulty index, discrimination, and student feedback).</p>
                        </list-item>
                        <list-item>
                            <label>2.</label>
                            <p>

                                <bold>Analyze patterns</bold> to identify questions that fail to differentiate or that mislead due to ambiguous wording.</p>
                        </list-item>
                        <list-item>
                            <label>3.</label>
                            <p>

                                <bold>Prompt AI</bold> with explicit instructions for targeted revision (&#x201c;simplify stem language,&#x201d; &#x201c;add one contextual risk factor,&#x201d; etc.).</p>
                        </list-item>
                        <list-item>
                            <label>4.</label>
                            <p>

                                <bold>Revalidate</bold> revised items using the adapted validation framework (Tip 6).</p>
                        </list-item>
                        <list-item>
                            <label>5.</label>
                            <p>

                                <bold>Re-analyze
</bold> post-revision metrics before including items in summative pools (
                                <xref ref-type="bibr" rid="ref11">Farmer &amp; Page, 2005</xref>; 
                                <xref ref-type="bibr" rid="ref27">Nayer et al., 2018</xref>; 
                                <xref ref-type="bibr" rid="ref37">Tolsgaard et al., 2023</xref>).</p>
                        </list-item>
                    </list>
                </p>
                <p>

                    <bold>Note:</bold> This process focuses solely on psychometric and content improvement. Considerations of inclusivity and bias mitigation are addressed separately (see Tip 10).</p>
            </sec>
            <sec id="sec14">
                <title>Tip 9: Safeguard equity, diversity, and inclusion in item content</title>
                <p>Equity, diversity, and inclusion (EDI) are essential principles in assessment design. In the context of Key Feature Problems (KFP), EDI ensures that all learners engage with clinically authentic yet culturally fair scenarios that reflect the diversity of real-world patient populations (
                    <xref ref-type="bibr" rid="ref20">Kim et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref37">Tolsgaard et al., 2023</xref>). When generative AI is used to create KFP, additional vigilance is required to prevent the unintentional introduction or amplification of bias in case content, patient descriptors, or reasoning expectations (
                    <xref ref-type="bibr" rid="ref20">Kim et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref33">Rodman et al., 2024</xref>).</p>
                <p>

                    <bold>Identify and Mitigate Potential Bias in AI Outputs</bold>
                </p>
                <p>AI models can inadvertently reproduce societal or dataset biases, leading to stereotypical patient profiles, imbalanced demographic representation, or culturally narrow assumptions (
                    <xref ref-type="bibr" rid="ref20">Kim et al., 2024</xref>).</p>
                <p>To prevent this, educators should:
                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>

                                <bold>Audit AI-generated cases</bold> for demographic balance across age, gender, ethnicity, and socioeconomic background.</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>

                                <bold>Remove stereotypical associations</bold> (e.g., linking certain diseases disproportionately to specific ethnic groups without epidemiological justification).</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>

                                <bold>Diversify contextual variables</bold>, such as healthcare setting, geographic region, and access to resources, to mirror real-world practice diversity (
                                <xref ref-type="bibr" rid="ref37">Tolsgaard et al., 2023</xref>).</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>

                                <bold>Involve diverse faculty reviewers and learners</bold> in item validation to surface biases that might be invisible to homogeneous panels (
                                <xref ref-type="bibr" rid="ref33">Rodman et al., 2024</xref>).</p>
                        </list-item>
                    </list>
                </p>
                <p>

                    <bold>Promote Inclusive Case Representation</bold>
                </p>
                <p>EDI-aligned KFP should expose learners to the breadth of human variation and social determinants that influence diagnosis and management. AI can assist by generating case variants that represent different demographic or psychosocial contexts while maintaining equivalent cognitive challenge (
                    <xref ref-type="bibr" rid="ref4">Berbenyuk et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref20">Kim et al., 2024</xref>).</p>
                <p>For example, a case on myocardial infarction can be rendered across:
                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>A younger female with atypical presentation,</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>An older diabetic male with silent ischemia, and</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>A rural patient with delayed access to emergency care.</p>
                        </list-item>
                    </list>
                </p>
                <p>Such diversity fosters equitable preparedness and reduces bias in clinical decision-making (
                    <xref ref-type="bibr" rid="ref20">Kim et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref33">Rodman et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref37">Tolsgaard et al., 2023</xref>).</p>
                <p>To operationalize inclusivity in AI-assisted KFP design, educators should follow a structured EDI review sequence illustrated in 
                    <xref ref-type="fig" rid="f4">
Figure 4</xref>, which outlines the bias-mitigation checkpoints during AI generation and validation.</p>
                <fig fig-type="figure" id="f4" orientation="portrait" position="float">
                    <label>
Figure 4. </label>
                    <caption>
                        <title>Bias mitigation checkpoints during AI-assisted item generation.</title>
                    </caption>
                    <graphic id="gr4" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/191236/2f5165c3-f8fa-4ee7-8fdf-4839d77a239e_figure4.gif"/>
                </fig>
                <p>

                    <bold>Integrate EDI Checks Into the KFP Workflow</bold>
                </p>
                <p>To operationalize inclusivity in AI-assisted KFP design:
                    <list list-type="order">
                        <list-item>
                            <label>1.</label>
                            <p>

                                <bold>Set EDI parameters before prompting AI</bold>, specifying desired demographic distribution and case diversity.</p>
                        </list-item>
                        <list-item>
                            <label>2.</label>
                            <p>

                                <bold>Review all generated content</bold> with an EDI checklist (representation balance, language neutrality, accessibility).</p>
                        </list-item>
                        <list-item>
                            <label>3.</label>
                            <p>

                                <bold>Pilot-test
</bold> questions across mixed learner groups to identify differential performance that could signal construct-irrelevant bias (
                                <xref ref-type="bibr" rid="ref33">Rodman et al., 2024</xref>; 
                                <xref ref-type="bibr" rid="ref37">Tolsgaard et al., 2023</xref>).</p>
                        </list-item>
                        <list-item>
                            <label>4.</label>
                            <p>

                                <bold>Document revisions</bold> and maintain transparency about the EDI review process as part of assessment governance.</p>
                        </list-item>
                    </list>
                </p>
                <p>

                    <bold>Original AI Output:</bold>
                </p>
                <p>A 45-year-old South Asian man with poorly controlled diabetes presents with chest pain after eating a heavy meal.</p>
                <p>

                    <bold>Issue:</bold> The AI model consistently associated &#x201c;South Asian&#x201d; with &#x201c;diabetes,&#x201d; reinforcing a stereotype without instructional purpose.</p>
                <p>

                    <bold>Revised Prompt and Case:</bold>
                </p>
                <p>Generate a case of a 45-year-old adult presenting with chest pain unrelated to ethnicity. Include relevant lifestyle and risk factors.</p>
                <p>

                    <bold>Result:</bold> The AI produced a balanced scenario highlighting modifiable risks (sedentary lifestyle, hypertension) rather than cultural identity, aligning better with fairness and learning objectives.</p>
                <p>

                    <bold>Note:</bold> EDI alignment is not a single review step but a continuous design principle that parallels psychometric validation. Each AI-generated KFP should undergo both content and equity review before use to ensure fairness, representation, and clinical authenticity (
                    <xref ref-type="bibr" rid="ref20">Kim et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref33">Rodman et al., 2024</xref>).</p>
            </sec>
            <sec id="sec15">
                <title>Tip 10: Use AI ethically and document it transparently</title>
                <p>Use existing pre-trained models rather than developing new ones. Concentrate faculty effort on prompt design, SME review, and validity checks so AI output meets curricular and clinical standards (
                    <xref ref-type="bibr" rid="ref4">Berbenyuk et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref22">Kovari, 2024</xref>; 
                    <xref ref-type="bibr" rid="ref37">Tolsgaard et al., 2023</xref>).</p>
                <p>

                    <bold>Document for auditability.</bold> For each item, record the tool/model and version used, prompt template (and key settings), SME comments, and validation outcomes (see Tip 6). This enables reproducibility and external review by faculty and accreditors (
                    <xref ref-type="bibr" rid="ref22">Kovari, 2024</xref>; 
                    <xref ref-type="bibr" rid="ref33">Rodman et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref37">Tolsgaard et al., 2023</xref>).</p>
                <p>

                    <bold>Protect boundaries.</bold> Never upload identifiable learner or patient data to external tools; clarify authorship when AI contributes text or drafts; require human sign-off on all exam materials (
                    <xref ref-type="bibr" rid="ref22">Kovari, 2024</xref>; 
                    <xref ref-type="bibr" rid="ref37">Tolsgaard et al., 2023</xref>).</p>
                <p>

                    <bold>Build capacity.</bold> Provide ongoing faculty development in responsible prompting, data stewardship, and bias awareness so that AI augments educational expertise rather than replacing it (
                    <xref ref-type="bibr" rid="ref4">Berbenyuk et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref16">Indran et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref22">Kovari, 2024</xref>; 
                    <xref ref-type="bibr" rid="ref37">Tolsgaard et al., 2023</xref>).</p>
                <p>

                    <bold>Be pragmatic about clinical data.</bold> Until secure educational data environments mature, prefer synthetic or de-identified sources and simulated EHR interfaces; full interoperability with real systems is generally not feasible yet (
                    <xref ref-type="bibr" rid="ref5">Blau et al., 2024</xref>; 
                    <xref ref-type="bibr" rid="ref32">Razmi, 2024</xref>; 
                    <xref ref-type="bibr" rid="ref37">Tolsgaard et al., 2023</xref>).</p>
                <p>

                    <bold>Quick checklist (for your item bank record):</bold>

                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Tool/model + version</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Prompt template/context</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>SME reviewers + decisions</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Validation evidence (per Tip 6)</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Data handling and disclosure notes</p>
                        </list-item>
                    </list>
                </p>
                <p>This keeps AI use ethical, transparent, and sustainable while preserving assessment integrity.</p>
                <p>A consolidated overview of all ten tips summarizing their purposes, recommended educator actions, and common pitfalls is presented in 
                    <xref ref-type="table" rid="T5">
Table 5</xref>.</p>
                <table-wrap id="T5" orientation="portrait" position="float">
                    <label>
Table 5. </label>
                    <caption>
                        <title>Ten tips for AI-assisted KFP design: purpose, actions, and pitfalls.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Tip</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Purpose</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Concrete actions</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Pitfall to avoid</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">1. Define learning outcomes and key features</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Anchor AI output to decisions that matter</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Write the LO. List 3&#x2013;5 key features. Prompt AI to refine 
                                    <italic toggle="yes">only</italic> those features. Keep SME-approved features.</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Letting AI invent new outcomes or drift from the blueprint</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">2. Build authentic, context-rich vignettes</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Increase cognitive fidelity and transfer</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Prompt for age/sex, comorbidities, setting, constraints; localize names, drugs, and guidelines.</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Generic, placeless cases misaligned with local practice</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">3. Generate scenario diversity and parallel variants</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Support progress testing and reduce cueing</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Create 3&#x2013;4 variants that keep key features but change demographics, severity, and setting; tag each variant.</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Changing the construct or difficulty too much across variants</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">4. Scaffold higher-order reasoning</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Move beyond recall to clinical reasoning</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Sequence prompts: identify &#x2192; interpret &#x2192; prioritize &#x2192; justify; add &#x201c;what-if&#x201d; branches.</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Single-step items solvable by pattern recognition</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">5. Align complexity &amp; format with learner level and curriculum</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Keep items fair and teachable for the target group</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">State learner level/course; tune data load, ambiguity, and steps; select format (write-in/SM/EMQ) to match intent.</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Reusing high-complexity items for early learners</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">6. Validate items using the 5-step workflow</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Make items defensible before high-stakes use</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Document content SME check, cognitive walkthrough, small-group response check, item analysis, consequences review.</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Treating AI output as final or skipping documentation</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">7. Provide decision-specific, actionable feedback</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Turn KFPs into formative tools</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Draft per-key-feature feedback for correct/partial/incorrect; SMEs edit for safety and tone.</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Global case summaries that ignore the exact decision error</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">8. Refine using performance &amp; psychometrics</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Close the loop with real data</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Review p-value, discrimination, distractor use, and comments; prompt AI for targeted rewrites; re-validate.</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Keeping weak items in the bank without revision</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">9. Safeguard equity, diversity &amp; inclusion</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Prevent construct-irrelevant bias</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Set EDI parameters in prompts; audit representation and language; pilot across mixed groups; record EDI review.</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Stereotypes, single-setting/single-demographic fixation</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">10. Use AI ethically &amp; document transparently</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Protect security, trust, and auditability</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Prefer pre-trained models; record tool/version, prompts, SME decisions, validation evidence; avoid identifiable data; provide faculty PD; use synthetic/de-identified clinical data.</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Uploading identifiable data or omitting disclosure/governance</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
            </sec>
            <sec id="sec16">
                <title>Key takeaways</title>
                <p>

                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Start with outcomes and key features; keep AI inside those boundaries.</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Build realism and parallel variants to test transfer, not recall.</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Calibrate complexity and format to learner level, then validate with a simple 5-step chain.</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Feedback must be decision-specific; use post-delivery data to iterate.</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Bake in EDI checks to avoid bias and construct-irrelevant variance.</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Treat AI as an assistant: document tools, prompts, SME decisions, and data-handling; never upload identifiable data.</p>
                        </list-item>
                    </list>
                </p>
            </sec>
            <sec id="sec17">
                <title>Limitations and scope</title>
                <p>This paper is intended as a practice-oriented guide rather than an empirical or psychometric validation study. Its focus is on the educational design and responsible use of generative artificial intelligence (AI) to assist in developing Key Feature Problems (KFP) within undergraduate (UME) and postgraduate medical education (PGME) contexts. The recommendations emphasize conceptual alignment, item quality, and governance rather than quantitative analysis of reliability, validity coefficients, or statistical performance metrics.</p>
                <p>The scope of guidance also excludes blueprinting logistics, standard setting, and scoring procedures, which vary across institutions and are beyond the current discussion. While examples provided illustrate typical clinical reasoning domains, they are intended to demonstrate design principles rather than to serve as validated assessment items.</p>
                <p>Implementation feasibility may differ depending on institutional infrastructure, data governance maturity, and faculty readiness. The principles described should therefore be adapted to local curricular frameworks, regulatory requirements, and available AI tools. Educators should interpret these tips as a foundation for responsible innovation and not as a prescriptive or exhaustive model for KFP development.</p>
            </sec>
        </sec>
        <sec id="sec18" sec-type="conclusion">
            <title>Conclusion</title>
            <p>This article offers a practical pathway for integrating generative AI into Key Feature Problem design while preserving educational rigor, fairness, and clinical authenticity. The ten tips anchor AI use to clearly defined outcomes and key features; they promote authentic, context-rich vignettes and parallel variants; they scaffold higher-order reasoning rather than simple recall; and they require systematic validation, targeted feedback, and continuous psychometric refinement. Applied together, these practices turn AI from a novelty into a reliable assistant that strengthens the defensibility and learning value of KFPs within programmatic assessment.</p>
            <p>Effective implementation depends on disciplined process rather than advanced modeling. Institutions should prioritize transparent documentation of tools, prompts, SME decisions, and validation evidence; embed equity checks to reduce construct-irrelevant variance; and provide ongoing faculty development in responsible prompting, data stewardship, and bias awareness. Until secure educational data environments mature (e.g., institutionally hosted sandboxes), realism can be achieved through synthetic or de-identified data and simulated EHR interfaces. These guardrails protect privacy and trust while allowing innovation to advance in manageable, auditable steps.</p>
            <p>Adopting the ten tips can improve both reliability and educational impact. Items become better aligned to curricular intent and learner level, feedback becomes decision-specific and actionable, and post-administration data drive iterative improvement rather than one-off item use. In this way, AI-supported KFPs contribute to a more coherent and equitable assessment ecosystem that helps learners practice clinical reasoning and transfer it to new settings.</p>
            <p>Future work should test these recommendations at scale. Priorities include prospective studies on learning outcomes, stability of psychometric indices across cohorts and subgroups, the effectiveness of bias and equity audits, and the operational value of documentation checklists for accreditation. Cross-institution collaborations and shared repositories of prompts, validation artifacts, and item revision histories will accelerate cumulative knowledge. With careful governance and continuous evaluation, AI can augment rather than replace educational expertise and help institutions deliver assessments that are authentic, defensible, and oriented toward better patient care.</p>
        </sec>
    </body>
    <back>
        <sec id="sec21" sec-type="data-availability">
            <title>Data availability</title>
            <p>No datasets were generated or analyzed during the preparation of this article. Therefore, data sharing is not applicable.</p>
        </sec>
        <ref-list>
            <title>References</title>
            <ref id="ref1">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Almansour</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Alfhaid</surname>
                            <given-names>FM</given-names>
                        </name>
</person-group>:
                    <article-title>Generative artificial intelligence and the personalization of health professional education: A narrative review.</article-title>
                    <source>

                        <italic toggle="yes">Medicine.</italic>
</source>
                    <year>2024</year>;<volume>103</volume>(<issue>31</issue>):<fpage>e38955</fpage>.
                    <pub-id pub-id-type="pmid">39093806</pub-id>
                    <pub-id pub-id-type="doi">10.1097/MD.0000000000038955</pub-id>
                    <pub-id pub-id-type="pmcid">PMC11296413</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref2">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Andrade</surname>
                            <given-names>G d SA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Alves</surname>
                            <given-names>GP</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Melo</surname>
                            <given-names>TA</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Ethical reasoning in medical decisions: the physician-patient dilemma.</article-title>
                    <source>

                        <italic toggle="yes">Revista Bio&#x00e9;tica.</italic>
</source>
                    <year>2024</year>;<volume>32</volume>.
                    <pub-id pub-id-type="doi">10.1590/1983-803420243658EN</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref3">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ara&#x00fa;jo</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gomes</surname>
                            <given-names>SF</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ribeiro</surname>
                            <given-names>L</given-names>
                        </name>
</person-group>:
                    <article-title>Critical thinking pedagogical practices in medical education: a systematic review.</article-title>
                    <source>

                        <italic toggle="yes">Front. Med.</italic>
</source>
                    <year>2024</year>;<volume>11</volume>.
                    <pub-id pub-id-type="pmid">38947238</pub-id>
                    <pub-id pub-id-type="doi">10.3389/FMED.2024.1358444</pub-id>
                    <pub-id pub-id-type="pmcid">PMC11211358</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref4">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Berbenyuk</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Powell</surname>
                            <given-names>L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Zary</surname>
                            <given-names>N</given-names>
                        </name>
</person-group>:
                    <article-title>Feasibility and Educational Value of Clinical Cases Generated Using Large Language Models.</article-title>
                    <source>

                        <italic toggle="yes">Stud. Health Technol. Inform.</italic>
</source>
                    <year>2024</year>;<volume>316</volume>:<fpage>1524</fpage>&#x2013;<lpage>1528</lpage>.
                    <pub-id pub-id-type="pmid">39176494</pub-id>
                    <pub-id pub-id-type="doi">10.3233/shti240705</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref5">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Blau</surname>
                            <given-names>W</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Cerf</surname>
                            <given-names>VG</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Enriquez</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Protecting scientific integrity in an age of generative AI.</article-title>
                    <source>

                        <italic toggle="yes">Proc. Natl. Acad. Sci. USA.</italic>
</source>
                    <year>2024</year>;<volume>121</volume>:<fpage>e2407886121</fpage>.
                    <pub-id pub-id-type="pmid">38771193</pub-id>
                    <pub-id pub-id-type="doi">10.1073/pnas.2407886121</pub-id>
                    <pub-id pub-id-type="pmcid">PMC11145223</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref6">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Burner</surname>
                            <given-names>T</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lindvig</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>W&#x00e6;rness</surname>
                            <given-names>JI</given-names>
                        </name>
</person-group>:
                    <article-title>&#x201c;We Should Not Be Like a Dinosaur&#x201d;&#x2014;Using AI Technologies to Provide Formative Feedback to Students.</article-title>
                    <source>

                        <italic toggle="yes">Educ. Sci.</italic>
</source>
                    <year>2025</year>;<volume>15</volume>(<issue>1</issue>):<fpage>58</fpage>&#x2013;<lpage>58</lpage>.
                    <pub-id pub-id-type="doi">10.3390/educsci15010058</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref7">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Connor</surname>
                            <given-names>DM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Durning</surname>
                            <given-names>SJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Rencic</surname>
                            <given-names>JJ</given-names>
                        </name>
</person-group>:
                    <article-title>Clinical Reasoning as a Core Competency.</article-title>
                    <source>

                        <italic toggle="yes">Acad. Med.</italic>
</source>
                    <year>2020</year>;<volume>95</volume>(<issue>8</issue>):<fpage>1166</fpage>&#x2013;<lpage>1171</lpage>.
                    <pub-id pub-id-type="doi">10.1097/acm.0000000000003027</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref8">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Cook</surname>
                            <given-names>DA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Brydges</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ginsburg</surname>
                            <given-names>S</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A contemporary approach to validity arguments: a practical guide to Kane&#x2019;s framework.</article-title>
                    <source>

                        <italic toggle="yes">Med. Educ.</italic>
</source>
                    <year>2015</year>;<volume>49</volume>(<issue>6</issue>):<fpage>560</fpage>&#x2013;<lpage>575</lpage>.
                    <pub-id pub-id-type="pmid">25989405</pub-id>
                    <pub-id pub-id-type="doi">10.1111/medu.12678</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref9">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Downing</surname>
                            <given-names>SM</given-names>
                        </name>
</person-group>:
                    <article-title>Threats to the validity of locally developed multiple-choice tests in medical education: construct-irrelevant variance and construct underrepresentation.</article-title>
                    <source>

                        <italic toggle="yes">Adv. Health Sci. Educ. Theory Pract.</italic>
</source>
                    <year>2002</year>;<volume>7</volume>(<issue>3</issue>):<fpage>235</fpage>&#x2013;<lpage>241</lpage>.
                    <pub-id pub-id-type="pmid">12510145</pub-id>
                    <pub-id pub-id-type="doi">10.1023/A:1021112514626</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref10">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Downing</surname>
                            <given-names>SM</given-names>
                        </name>
</person-group>:
                    <article-title>Reliability: on the reproducibility of assessment data.</article-title>
                    <source>

                        <italic toggle="yes">Med. Educ.</italic>
</source>
                    <year>2004</year>;<volume>38</volume>(<issue>9</issue>):<fpage>1006</fpage>&#x2013;<lpage>1012</lpage>.
                    <pub-id pub-id-type="doi">10.1111/j.1365-2929.2004.01932.x</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref11">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Farmer</surname>
                            <given-names>EA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Page</surname>
                            <given-names>G</given-names>
                        </name>
</person-group>:
                    <article-title>A practical guide to assessing clinical decision-making skills using the key features approach.</article-title>
                    <source>

                        <italic toggle="yes">Med. Educ.</italic>
</source>
                    <year>2005</year>;<volume>39</volume>(<issue>12</issue>):<fpage>1188</fpage>&#x2013;<lpage>1194</lpage>.
                    <pub-id pub-id-type="pmid">16313577</pub-id>
                    <pub-id pub-id-type="doi">10.1111/j.1365-2929.2005.02339.x</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref12">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Fatima</surname>
                            <given-names>SS</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Sheikh</surname>
                            <given-names>NA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Osama</surname>
                            <given-names>A</given-names>
                        </name>
</person-group>:
                    <article-title>Authentic assessment in medical education: exploring AI integration and student-as-partners collaboration.</article-title>
                    <source>

                        <italic toggle="yes">Postgrad. Med. J.</italic>
</source>
                    <year>2024</year>;<volume>100</volume>(<issue>1190</issue>):<fpage>959</fpage>&#x2013;<lpage>967</lpage>.
                    <pub-id pub-id-type="pmid">39041454</pub-id>
                    <pub-id pub-id-type="doi">10.1093/postmj/qgae088</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref13">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Franco D&#x2019;Souza</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mathew</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mishra</surname>
                            <given-names>V</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Twelve tips for addressing ethical concerns in the implementation of artificial intelligence in medical education.</article-title>
                    <source>

                        <italic toggle="yes">Med. Educ. Online.</italic>
</source>
                    <year>2024</year>;<volume>29</volume>(<issue>1</issue>).
                    <pub-id pub-id-type="pmid">38566608</pub-id>
                    <pub-id pub-id-type="doi">10.1080/10872981.2024.2330250</pub-id>
                    <pub-id pub-id-type="pmcid">PMC10993743</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref14">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Harden</surname>
                            <given-names>RM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Crosby</surname>
                            <given-names>JR</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Davis</surname>
                            <given-names>MH</given-names>
                        </name>
</person-group>:
                    <article-title>AMEE Guide No. 14: Outcome-based education: Part 1 - An introduction to outcome-based education.</article-title>
                    <source>

                        <italic toggle="yes">Med. Teach.</italic>
</source>
                    <year>1999</year>;<volume>21</volume>(<issue>1</issue>):<fpage>7</fpage>&#x2013;<lpage>14</lpage>.
                    <pub-id pub-id-type="doi">10.1080/01421599979969</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref15">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Hrynchak</surname>
                            <given-names>P</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Glover Takahashi</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Nayer</surname>
                            <given-names>M</given-names>
                        </name>
</person-group>:
                    <article-title>Key-feature questions for assessment of clinical reasoning: a literature review.</article-title>
                    <source>

                        <italic toggle="yes">Med. Educ.</italic>
</source>
                    <year>2014</year>;<volume>48</volume>(<issue>9</issue>):<fpage>870</fpage>&#x2013;<lpage>883</lpage>.
                    <pub-id pub-id-type="pmid">25113114</pub-id>
                    <pub-id pub-id-type="doi">10.1111/medu.12509</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref16">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Indran</surname>
                            <given-names>IR</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Paranthaman</surname>
                            <given-names>P</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gupta</surname>
                            <given-names>N</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Twelve tips to leverage AI for efficient and effective medical question generation: A guide for educators using Chat GPT.</article-title>
                    <source>

                        <italic toggle="yes">Med. Teach.</italic>
</source>
                    <year>2024</year>;<volume>46</volume>(<issue>8</issue>):<fpage>1021</fpage>&#x2013;<lpage>1026</lpage>.
                    <pub-id pub-id-type="pmid">38146711</pub-id>
                    <pub-id pub-id-type="doi">10.1080/0142159X.2023.2294703</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref17">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Jantausch</surname>
                            <given-names>BA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bost</surname>
                            <given-names>JE</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bhansali</surname>
                            <given-names>P</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Assessing trainee critical thinking skills using a novel interactive online learning tool.</article-title>
                    <source>

                        <italic toggle="yes">Med. Educ. Online.</italic>
</source>
                    <year>2023</year>;<volume>28</volume>(<issue>1</issue>).
                    <pub-id pub-id-type="pmid">36871259</pub-id>
                    <pub-id pub-id-type="doi">10.1080/10872981.2023.2178871</pub-id>
                    <pub-id pub-id-type="pmcid">PMC9987719</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref18">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Javaeed</surname>
                            <given-names>A</given-names>
                        </name>
</person-group>:
                    <article-title>Assessment of Higher Ordered Thinking in Medical Education: Multiple Choice Questions and Modified Essay Questions.</article-title>
                    <source>

                        <italic toggle="yes">MedEdPublish.</italic>
</source>
                    <year>2018</year>;<volume>7</volume>:<fpage>128</fpage>.
                    <pub-id pub-id-type="pmid">38074575</pub-id>
                    <pub-id pub-id-type="doi">10.15694/mep.2018.0000128.1</pub-id>
                    <pub-id pub-id-type="pmcid">PMC10699377</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref19">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kane</surname>
                            <given-names>MT</given-names>
                        </name>
</person-group>:
                    <article-title>Validating the Interpretations and Uses of Test Scores.</article-title>
                    <source>

                        <italic toggle="yes">J. Educ. Meas.</italic>
</source>
                    <year>2013</year>;<volume>50</volume>(<issue>1</issue>):<fpage>1</fpage>&#x2013;<lpage>73</lpage>.
                    <pub-id pub-id-type="doi">10.1111/JEDM.12000</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref20">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kim</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ham</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lee</surname>
                            <given-names>S-S</given-names>
                        </name>
</person-group>:
                    <article-title>Differences in student-AI interaction process on a drawing task: Focusing on students&#x2019; attitude towards AI and the level of drawing skills.</article-title>
                    <source>

                        <italic toggle="yes">Australas. J. Educ. Technol.</italic>
</source>
                    <year>2024</year>.
                    <pub-id pub-id-type="doi">10.14742/ajet.8859</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref21">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kim</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lee</surname>
                            <given-names>H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Cho</surname>
                            <given-names>YH</given-names>
                        </name>
</person-group>:
                    <article-title>Learning design to support student-AI collaboration: perspectives of leading teachers for AI in education.</article-title>
                    <source>

                        <italic toggle="yes">Educ. Inf. Technol.</italic>
</source>
                    <year>2022</year>;<volume>27</volume>(<issue>5</issue>):<fpage>6069</fpage>&#x2013;<lpage>6104</lpage>.
                    <pub-id pub-id-type="doi">10.1007/s10639-021-10831-6</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref22">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kovari</surname>
                            <given-names>A</given-names>
                        </name>
</person-group>:
                    <article-title>Ethical use of ChatGPT in education&#x2014;Best practices to combat AI-induced plagiarism.</article-title>
                    <source>

                        <italic toggle="yes">Frontiers in Education.</italic>
</source>
                    <year>2024</year>;<volume>9</volume>.
                    <pub-id pub-id-type="doi">10.3389/feduc.2024.1465703</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref23">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Lee</surname>
                            <given-names>SS</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Moore</surname>
                            <given-names>RL</given-names>
                        </name>
</person-group>:
                    <article-title>Harnessing Generative AI (GenAI) for Automated Feedback in Higher Education: A Systematic Review.</article-title>
                    <source>

                        <italic toggle="yes">Online Learning.</italic>
</source>
                    <year>2024</year>;<volume>28</volume>(<issue>3</issue>):<fpage>82</fpage>&#x2013;<lpage>104</lpage>.
                    <pub-id pub-id-type="doi">10.24059/olj.v28I3.4593</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref24">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>McLaughlin</surname>
                            <given-names>JE</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wolcott</surname>
                            <given-names>MD</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hubbard</surname>
                            <given-names>D</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A qualitative review of the design thinking framework in health professions education.</article-title>
                    <source>

                        <italic toggle="yes">BMC Med. Educ.</italic>
</source>
                    <year>2019</year>;<volume>19</volume>(<issue>1</issue>):<fpage>98</fpage>.
                    <pub-id pub-id-type="pmid">30947748</pub-id>
                    <pub-id pub-id-type="doi">10.1186/s12909-019-1528-8</pub-id>
                    <pub-id pub-id-type="pmcid">PMC6449899</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref25">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Messick</surname>
                            <given-names>S</given-names>
                        </name>
</person-group>:
                    <article-title>Validity of psychological assessment: Validation of inferences from persons&#x2019; responses and performances as scientific inquiry into score meaning.</article-title>
                    <source>

                        <italic toggle="yes">Am. Psychol.</italic>
</source>
                    <year>1995</year>;<volume>50</volume>(<issue>9</issue>):<fpage>741</fpage>&#x2013;<lpage>749</lpage>.
                    <pub-id pub-id-type="doi">10.1037/0003-066X.50.9.741</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref26">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Mishra</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Farooqui</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Shimna</surname>
                            <given-names>CS</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>The Role of Artificial Intelligence in Improving Medical Education: A Comprehensive Review.</article-title>
                    <source>

                        <italic toggle="yes">Advancement and New Understanding in Medical Science.</italic>
</source>
                    <year>2024</year>;<volume>7</volume>:<fpage>81</fpage>&#x2013;<lpage>101</lpage>.
                    <pub-id pub-id-type="doi">10.9734/bpi/anums/v7/7333b</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref27">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Nayer</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Glover Takahashi</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hrynchak</surname>
                            <given-names>P</given-names>
                        </name>
</person-group>:
                    <article-title>Twelve tips for developing key-feature questions (KFQ) for effective assessment of clinical reasoning.</article-title>
                    <source>

                        <italic toggle="yes">Med. Teach.</italic>
</source>
                    <year>2018</year>;<volume>40</volume>(<issue>11</issue>):<fpage>1116</fpage>&#x2013;<lpage>1122</lpage>.
                    <pub-id pub-id-type="pmid">30001652</pub-id>
                    <pub-id pub-id-type="doi">10.1080/0142159X.2018.1481281</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref28">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Page</surname>
                            <given-names>G</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bordage</surname>
                            <given-names>G</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Allen</surname>
                            <given-names>T</given-names>
                        </name>
</person-group>:
                    <article-title>Developing key-feature problems and examinations to assess clinical decision-making skills.</article-title>
                    <source>

                        <italic toggle="yes">Acad. Med.</italic>
</source>
                    <year>1995</year>;<volume>70</volume>(<issue>3</issue>):<fpage>194</fpage>&#x2013;<lpage>201</lpage>.
                    <pub-id pub-id-type="pmid">7873006</pub-id>
                    <pub-id pub-id-type="doi">10.1097/00001888-199503000-00009</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref29">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Potter</surname>
                            <given-names>L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Jefferies</surname>
                            <given-names>C</given-names>
                        </name>
</person-group>:
                    <article-title>Enhancing communication and clinical reasoning in medical education: Building virtual patients with generative AI.</article-title>
                    <source>

                        <italic toggle="yes">Future Healthcare Journal.</italic>
</source>
                    <year>2024</year>;<volume>11</volume>:<fpage>100043</fpage>.
                    <pub-id pub-id-type="doi">10.1016/j.fhj.2024.100043</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref30">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Preiksaitis</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Rose</surname>
                            <given-names>C</given-names>
                        </name>
</person-group>:
                    <article-title>Opportunities, Challenges, and Future Directions of Generative Artificial Intelligence in Medical Education: Scoping Review.</article-title>
                    <source>

                        <italic toggle="yes">JMIR Medical Education.</italic>
</source>
                    <year>2023</year>;<volume>9</volume>:<fpage>e48785</fpage>.
                    <pub-id pub-id-type="pmid">37862079</pub-id>
                    <pub-id pub-id-type="doi">10.2196/48785</pub-id>
                    <pub-id pub-id-type="pmcid">PMC10625095</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref31">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Qiu</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Liu</surname>
                            <given-names>C</given-names>
                        </name>
</person-group>:
                    <article-title>Capable exam-taker and question-generator: the dual role of generative AI in medical education assessment.</article-title>
                    <source>

                        <italic toggle="yes">Global Medical Education.</italic>
</source>
                    <year>2025</year>.
                    <pub-id pub-id-type="doi">10.1515/gme-2024-0021</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref32">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Razmi</surname>
                            <given-names>RM</given-names>
                        </name>
</person-group>:
                    <article-title>Building Robust Medical Algorithms.</article-title>
                    <source>

                        <italic toggle="yes">AI Doctor.</italic>
</source>
                    <year>2024</year>;<fpage>27</fpage>&#x2013;<lpage>65</lpage>.
                    <pub-id pub-id-type="doi">10.1002/9781394240197.ch2</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref33">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Rodman</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mark</surname>
                            <given-names>NM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Artino</surname>
                            <given-names>AR</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Using Generative Artificial Intelligence in Medical Education.</article-title>
                    <source>

                        <italic toggle="yes">Acad. Med.</italic>
</source>
                    <year>2024</year>;<volume>100</volume>(<issue>2</issue>):<fpage>250</fpage>&#x2013;<lpage>250</lpage>.
                    <pub-id pub-id-type="doi">10.1097/acm.0000000000005937</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref34">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Sardesai</surname>
                            <given-names>N</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Russo</surname>
                            <given-names>P</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Martin</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Utilizing generative conversational artificial intelligence to create simulated patient encounters: a pilot study for anaesthesia training.</article-title>
                    <source>

                        <italic toggle="yes">Postgrad. Med. J.</italic>
</source>
                    <year>2024</year>;<volume>100</volume>(<issue>1182</issue>):<fpage>237</fpage>&#x2013;<lpage>241</lpage>.
                    <pub-id pub-id-type="pmid">38240054</pub-id>
                    <pub-id pub-id-type="doi">10.1093/postmj/qgad137</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref35">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Subaveerapandiyan</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mvula</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ahmad</surname>
                            <given-names>N</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Assessing AI literacy and attitudes among medical students: implications for integration into healthcare practice.</article-title>
                    <source>

                        <italic toggle="yes">J. Health Organ. Manag.</italic>
</source>
                    <year>2024</year>.
                    <pub-id pub-id-type="doi">10.1108/jhom-04-2024-0154</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref36">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Teferi</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Omar</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Jeyakumar</surname>
                            <given-names>T</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Accelerating the Appropriate Adoption of Artificial Intelligence in Health Care: Prioritizing IDEA to Champion a Collaborative Educational Approach in a Stressed System.</article-title>
                    <source>

                        <italic toggle="yes">Educ. Sci.</italic>
</source>
                    <year>2023</year>;<volume>14</volume>(<issue>1</issue>):<fpage>39</fpage>.
                    <pub-id pub-id-type="doi">10.3390/educsci14010039</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref37">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Tolsgaard</surname>
                            <given-names>MG</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Pusic</surname>
                            <given-names>MV</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Sebok-Syer</surname>
                            <given-names>SS</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>The fundamentals of Artificial Intelligence in medical education research: AMEE Guide No. 156.</article-title>
                    <source>

                        <italic toggle="yes">Med. Teach.</italic>
</source>
                    <year>2023</year>;<volume>45</volume>(<issue>6</issue>):<fpage>565</fpage>&#x2013;<lpage>573</lpage>.
                    <pub-id pub-id-type="pmid">36862064</pub-id>
                    <pub-id pub-id-type="doi">10.1080/0142159X.2023.2180340</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref38">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Wade</surname>
                            <given-names>L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Harrison</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hollands</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Student perceptions of the progress test in two settings and the implications for test deployment.</article-title>
                    <source>

                        <italic toggle="yes">Adv. Health Sci. Educ.</italic>
</source>
                    <year>2012</year>;<volume>17</volume>(<issue>4</issue>):<fpage>573</fpage>&#x2013;<lpage>583</lpage>.
                    <pub-id pub-id-type="pmid">22041871</pub-id>
                    <pub-id pub-id-type="doi">10.1007/S10459-011-9334-z</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref39">
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Zaidi</surname>
                            <given-names>NLB</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Grob</surname>
                            <given-names>KL</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Monrad</surname>
                            <given-names>SM</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Pushing Critical Thinking Skills with Multiple-Choice Questions: Does Bloom&#x2019;s Taxonomy Work?</article-title>
                    <source>

                        <italic toggle="yes">Acad. Med.</italic>
</source>
                    <year>2018</year>;<volume>93</volume>(<issue>6</issue>):<fpage>856</fpage>&#x2013;<lpage>859</lpage>.
                    <pub-id pub-id-type="doi">10.1097/acm.0000000000002087</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref40">
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Zhang</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gao</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Suraworachet</surname>
                            <given-names>W</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Evaluating Trust in AI, Human, and Co-produced Feedback Among Undergraduate Students.</article-title>
                    <year>2025</year>.
                    <ext-link ext-link-type="uri" xlink:href="https://arxiv.org/pdf/2504.10961">Reference Source</ext-link>
                </mixed-citation>
            </ref>
        </ref-list>
    </back>
</article>
